binwang commited on
Commit
ecb9582
1 Parent(s): 18bbc47
Files changed (2) hide show
  1. all_results.json +40 -40
  2. app.py +5 -5
all_results.json CHANGED
@@ -32277,70 +32277,70 @@
32277
  },
32278
  "flores_ind2eng": {
32279
  "prompt_1": {
32280
- "bleu_score": 0.07998569558291352
32281
  },
32282
  "prompt_2": {
32283
- "bleu_score": 0.08178489772334997
32284
  },
32285
  "prompt_3": {
32286
- "bleu_score": 0.08148612648063638
32287
  },
32288
  "prompt_4": {
32289
- "bleu_score": 0.07978071248841677
32290
  },
32291
  "prompt_5": {
32292
- "bleu_score": 0.08665723913254166
32293
  }
32294
  },
32295
  "flores_vie2eng": {
32296
  "prompt_1": {
32297
- "bleu_score": 0.06595796158495684
32298
  },
32299
  "prompt_2": {
32300
- "bleu_score": 0.0659317494666598
32301
  },
32302
  "prompt_3": {
32303
- "bleu_score": 0.06631225923987717
32304
  },
32305
  "prompt_4": {
32306
- "bleu_score": 0.06562445607881094
32307
  },
32308
  "prompt_5": {
32309
- "bleu_score": 0.07261211813311091
32310
  }
32311
  },
32312
  "flores_zho2eng": {
32313
  "prompt_1": {
32314
- "bleu_score": 0.0502826099860938
32315
  },
32316
  "prompt_2": {
32317
- "bleu_score": 0.05134705353375384
32318
  },
32319
  "prompt_3": {
32320
- "bleu_score": 0.051760820535713056
32321
  },
32322
  "prompt_4": {
32323
- "bleu_score": 0.05136651836443981
32324
  },
32325
  "prompt_5": {
32326
- "bleu_score": 0.05595680934355571
32327
  }
32328
  },
32329
  "flores_zsm2eng": {
32330
  "prompt_1": {
32331
- "bleu_score": 0.08113017898863324
32332
  },
32333
  "prompt_2": {
32334
- "bleu_score": 0.08240154342677156
32335
  },
32336
  "prompt_3": {
32337
- "bleu_score": 0.08322691501291536
32338
  },
32339
  "prompt_4": {
32340
- "bleu_score": 0.07956020114017891
32341
  },
32342
  "prompt_5": {
32343
- "bleu_score": 0.08635159030424207
32344
  }
32345
  },
32346
  "mmlu": {
@@ -67059,70 +67059,70 @@
67059
  },
67060
  "flores_ind2eng": {
67061
  "prompt_1": {
67062
- "bleu_score": 0.08681388844799684
67063
  },
67064
  "prompt_2": {
67065
- "bleu_score": 0.0880334958080158
67066
  },
67067
  "prompt_3": {
67068
- "bleu_score": 0.08805929258706634
67069
  },
67070
  "prompt_4": {
67071
- "bleu_score": 0.08842607869136156
67072
  },
67073
  "prompt_5": {
67074
- "bleu_score": 0.08768127846496257
67075
  }
67076
  },
67077
  "flores_vie2eng": {
67078
  "prompt_1": {
67079
- "bleu_score": 0.07138322514226816
67080
  },
67081
  "prompt_2": {
67082
- "bleu_score": 0.07471863870389119
67083
  },
67084
  "prompt_3": {
67085
- "bleu_score": 0.07467792001731594
67086
  },
67087
  "prompt_4": {
67088
- "bleu_score": 0.0749366075388539
67089
  },
67090
  "prompt_5": {
67091
- "bleu_score": 0.07514655189733327
67092
  }
67093
  },
67094
  "flores_zho2eng": {
67095
  "prompt_1": {
67096
- "bleu_score": 0.05903468503840439
67097
  },
67098
  "prompt_2": {
67099
- "bleu_score": 0.057043614692943063
67100
  },
67101
  "prompt_3": {
67102
- "bleu_score": 0.05784040342624702
67103
  },
67104
  "prompt_4": {
67105
- "bleu_score": 0.05727950339564555
67106
  },
67107
  "prompt_5": {
67108
- "bleu_score": 0.057461566054743354
67109
  }
67110
  },
67111
  "flores_zsm2eng": {
67112
  "prompt_1": {
67113
- "bleu_score": 0.08624501669818817
67114
  },
67115
  "prompt_2": {
67116
- "bleu_score": 0.09009103541257128
67117
  },
67118
  "prompt_3": {
67119
- "bleu_score": 0.09153674138343326
67120
  },
67121
  "prompt_4": {
67122
- "bleu_score": 0.0910349620847283
67123
  },
67124
  "prompt_5": {
67125
- "bleu_score": 0.08823269277227647
67126
  }
67127
  },
67128
  "mmlu": {
 
32277
  },
32278
  "flores_ind2eng": {
32279
  "prompt_1": {
32280
+ "bleu_score": 0.3087387231733152
32281
  },
32282
  "prompt_2": {
32283
+ "bleu_score": 0.3094226547039261
32284
  },
32285
  "prompt_3": {
32286
+ "bleu_score": 0.3061124934874166
32287
  },
32288
  "prompt_4": {
32289
+ "bleu_score": 0.30135340693301044
32290
  },
32291
  "prompt_5": {
32292
+ "bleu_score": 0.30791510943643785
32293
  }
32294
  },
32295
  "flores_vie2eng": {
32296
  "prompt_1": {
32297
+ "bleu_score": 0.24226557595813872
32298
  },
32299
  "prompt_2": {
32300
+ "bleu_score": 0.24374681205197152
32301
  },
32302
  "prompt_3": {
32303
+ "bleu_score": 0.23865746431889961
32304
  },
32305
  "prompt_4": {
32306
+ "bleu_score": 0.24343786296993222
32307
  },
32308
  "prompt_5": {
32309
+ "bleu_score": 0.2496790676198905
32310
  }
32311
  },
32312
  "flores_zho2eng": {
32313
  "prompt_1": {
32314
+ "bleu_score": 0.18741482916807534
32315
  },
32316
  "prompt_2": {
32317
+ "bleu_score": 0.18861522471729936
32318
  },
32319
  "prompt_3": {
32320
+ "bleu_score": 0.1828941675772202
32321
  },
32322
  "prompt_4": {
32323
+ "bleu_score": 0.18500544495397628
32324
  },
32325
  "prompt_5": {
32326
+ "bleu_score": 0.19088057936700595
32327
  }
32328
  },
32329
  "flores_zsm2eng": {
32330
  "prompt_1": {
32331
+ "bleu_score": 0.31040973391193794
32332
  },
32333
  "prompt_2": {
32334
+ "bleu_score": 0.31410450445911836
32335
  },
32336
  "prompt_3": {
32337
+ "bleu_score": 0.30742063457580054
32338
  },
32339
  "prompt_4": {
32340
+ "bleu_score": 0.2954984182513215
32341
  },
32342
  "prompt_5": {
32343
+ "bleu_score": 0.3059634141807576
32344
  }
32345
  },
32346
  "mmlu": {
 
67059
  },
67060
  "flores_ind2eng": {
67061
  "prompt_1": {
67062
+ "bleu_score": 0.011674358088733964
67063
  },
67064
  "prompt_2": {
67065
+ "bleu_score": 0.34299290022800966
67066
  },
67067
  "prompt_3": {
67068
+ "bleu_score": 0.34235818894094877
67069
  },
67070
  "prompt_4": {
67071
+ "bleu_score": 0.344471697570177
67072
  },
67073
  "prompt_5": {
67074
+ "bleu_score": 0.34330146854458155
67075
  }
67076
  },
67077
  "flores_vie2eng": {
67078
  "prompt_1": {
67079
+ "bleu_score": 0.004016316923669523
67080
  },
67081
  "prompt_2": {
67082
+ "bleu_score": 0.28858215451103547
67083
  },
67084
  "prompt_3": {
67085
+ "bleu_score": 0.2874460615046707
67086
  },
67087
  "prompt_4": {
67088
+ "bleu_score": 0.2895463893365964
67089
  },
67090
  "prompt_5": {
67091
+ "bleu_score": 0.28765593996471855
67092
  }
67093
  },
67094
  "flores_zho2eng": {
67095
  "prompt_1": {
67096
+ "bleu_score": 0.004745747326013472
67097
  },
67098
  "prompt_2": {
67099
+ "bleu_score": 0.21742919083139323
67100
  },
67101
  "prompt_3": {
67102
+ "bleu_score": 0.21718999888377416
67103
  },
67104
  "prompt_4": {
67105
+ "bleu_score": 0.2171201069019555
67106
  },
67107
  "prompt_5": {
67108
+ "bleu_score": 0.21609843798223957
67109
  }
67110
  },
67111
  "flores_zsm2eng": {
67112
  "prompt_1": {
67113
+ "bleu_score": 0.010737551256522686
67114
  },
67115
  "prompt_2": {
67116
+ "bleu_score": 0.35662624808916016
67117
  },
67118
  "prompt_3": {
67119
+ "bleu_score": 0.35860534258636234
67120
  },
67121
  "prompt_4": {
67122
+ "bleu_score": 0.35739510518617695
67123
  },
67124
  "prompt_5": {
67125
+ "bleu_score": 0.3485870508300006
67126
  }
67127
  },
67128
  "mmlu": {
app.py CHANGED
@@ -2297,11 +2297,11 @@ with block:
2297
  """)
2298
 
2299
 
2300
- with gr.TabItem("Reasoning"):
2301
 
2302
 
2303
  # dataset 12:
2304
- with gr.TabItem("MMLU"):
2305
  with gr.TabItem("Zero Shot"):
2306
  with gr.TabItem("Overall"):
2307
  with gr.Row():
@@ -2355,7 +2355,7 @@ with block:
2355
 
2356
 
2357
  # dataset 14:
2358
- with gr.TabItem("C_EVAL"):
2359
  with gr.TabItem("Zero Shot"):
2360
  with gr.TabItem("Overall"):
2361
  with gr.Row():
@@ -2408,7 +2408,7 @@ with block:
2408
 
2409
 
2410
  # dataset 16:
2411
- with gr.TabItem("CMMLU"):
2412
  with gr.TabItem("Zero Shot"):
2413
  with gr.TabItem("Overall"):
2414
  with gr.Row():
@@ -2622,7 +2622,7 @@ with block:
2622
  with gr.TabItem("Emotion"):
2623
 
2624
  # dataset 18:
2625
- with gr.TabItem("ind_emotion"):
2626
  with gr.TabItem("Zero Shot"):
2627
  with gr.TabItem("Overall"):
2628
  with gr.Row():
 
2297
  """)
2298
 
2299
 
2300
+ with gr.TabItem("General Reasoning"):
2301
 
2302
 
2303
  # dataset 12:
2304
+ with gr.TabItem("MMLU Subset"):
2305
  with gr.TabItem("Zero Shot"):
2306
  with gr.TabItem("Overall"):
2307
  with gr.Row():
 
2355
 
2356
 
2357
  # dataset 14:
2358
+ with gr.TabItem("C_EVAL Subset"):
2359
  with gr.TabItem("Zero Shot"):
2360
  with gr.TabItem("Overall"):
2361
  with gr.Row():
 
2408
 
2409
 
2410
  # dataset 16:
2411
+ with gr.TabItem("CMMLU Subset"):
2412
  with gr.TabItem("Zero Shot"):
2413
  with gr.TabItem("Overall"):
2414
  with gr.Row():
 
2622
  with gr.TabItem("Emotion"):
2623
 
2624
  # dataset 18:
2625
+ with gr.TabItem("Indonesian Emotion Classification"):
2626
  with gr.TabItem("Zero Shot"):
2627
  with gr.TabItem("Overall"):
2628
  with gr.Row():