binwang commited on
Commit
8024fdd
•
1 Parent(s): 5da889a

add a few new datasets

Browse files
Files changed (1) hide show
  1. app.py +413 -0
app.py CHANGED
@@ -547,6 +547,275 @@ def get_data_ph_eval(eval_mode='zero_shot', fillna=True, rank=True):
547
  PH_EVAL_ZERO_SHOT = get_data_ph_eval(eval_mode="zero_shot")
548
  PH_EVAL_FIVE_SHOT = get_data_ph_eval(eval_mode="five_shot")
549
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
551
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
552
 
@@ -792,7 +1061,151 @@ with block:
792
  )
793
 
794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
 
797
  gr.Markdown(r"""
798
 
 
547
  PH_EVAL_ZERO_SHOT = get_data_ph_eval(eval_mode="zero_shot")
548
  PH_EVAL_FIVE_SHOT = get_data_ph_eval(eval_mode="five_shot")
549
 
550
+
551
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
552
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
553
+
554
+
555
+ def get_data_sing2eng(eval_mode='zero_shot', fillna=True, rank=True):
556
+
557
+ df_list = []
558
+
559
+ for model in MODEL_LIST:
560
+
561
+
562
+ results_list = [ALL_RESULTS[model][eval_mode]['sing2eng'][res] for res in ALL_RESULTS[model][eval_mode]['sing2eng']]
563
+
564
+
565
+ try:
566
+ bleu_score = median([results['bleu_score'] for results in results_list])
567
+
568
+ except:
569
+ print(results_list)
570
+ bleu_score = -1
571
+
572
+
573
+ res = {
574
+ "Model Size (Params)": MODEL_TO_SIZE.get(model, ""),
575
+ "Model": make_clickable_model(model, link=ALL_RESULTS[model]["model_link"]),
576
+ "BLEU": bleu_score,
577
+ }
578
+
579
+ df_list.append(res)
580
+
581
+
582
+ df = pd.DataFrame(df_list)
583
+ # If there are any models that are the same, merge them
584
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
585
+ df = df.groupby("Model", as_index=False).first()
586
+ # Put 'Model' column first
587
+ #cols = sorted(list(df.columns))
588
+ cols = list(df.columns)
589
+ cols.insert(0, cols.pop(cols.index("Model")))
590
+ df = df[cols]
591
+
592
+ if rank:
593
+ df = add_rank(df, compute_average=True)
594
+
595
+ if fillna:
596
+ df.fillna("", inplace=True)
597
+
598
+ return df
599
+
600
+
601
+ SING2ENG_ZERO_SHOT = get_data_sing2eng(eval_mode="zero_shot")
602
+ SING2ENG_FIVE_SHOT = get_data_sing2eng(eval_mode="five_shot")
603
+
604
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
605
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
606
+
607
+
608
+ def get_data_flores_ind2eng(eval_mode='zero_shot', fillna=True, rank=True):
609
+
610
+ df_list = []
611
+
612
+ for model in MODEL_LIST:
613
+
614
+
615
+ results_list = [ALL_RESULTS[model][eval_mode]['flores_ind2eng'][res] for res in ALL_RESULTS[model][eval_mode]['flores_ind2eng']]
616
+
617
+
618
+ try:
619
+ bleu_score = median([results['bleu_score'] for results in results_list])
620
+
621
+ except:
622
+ print(results_list)
623
+ bleu_score = -1
624
+
625
+
626
+ res = {
627
+ "Model Size (Params)": MODEL_TO_SIZE.get(model, ""),
628
+ "Model": make_clickable_model(model, link=ALL_RESULTS[model]["model_link"]),
629
+ "BLEU": bleu_score,
630
+ }
631
+
632
+ df_list.append(res)
633
+
634
+
635
+ df = pd.DataFrame(df_list)
636
+ # If there are any models that are the same, merge them
637
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
638
+ df = df.groupby("Model", as_index=False).first()
639
+ # Put 'Model' column first
640
+ #cols = sorted(list(df.columns))
641
+ cols = list(df.columns)
642
+ cols.insert(0, cols.pop(cols.index("Model")))
643
+ df = df[cols]
644
+
645
+ if rank:
646
+ df = add_rank(df, compute_average=True)
647
+
648
+ if fillna:
649
+ df.fillna("", inplace=True)
650
+
651
+ return df
652
+
653
+
654
+ FLORES_IND2ENG_ZERO_SHOT = get_data_flores_ind2eng(eval_mode="zero_shot")
655
+ FLORES_IND2ENG_FIVE_SHOT = get_data_flores_ind2eng(eval_mode="five_shot")
656
+
657
+
658
+
659
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
660
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
661
+
662
+
663
+ def get_data_flores_vie2eng(eval_mode='zero_shot', fillna=True, rank=True):
664
+
665
+ df_list = []
666
+
667
+ for model in MODEL_LIST:
668
+
669
+
670
+ results_list = [ALL_RESULTS[model][eval_mode]['flores_vie2eng'][res] for res in ALL_RESULTS[model][eval_mode]['flores_vie2eng']]
671
+
672
+
673
+ try:
674
+ bleu_score = median([results['bleu_score'] for results in results_list])
675
+
676
+ except:
677
+ print(results_list)
678
+ bleu_score = -1
679
+
680
+
681
+ res = {
682
+ "Model Size (Params)": MODEL_TO_SIZE.get(model, ""),
683
+ "Model": make_clickable_model(model, link=ALL_RESULTS[model]["model_link"]),
684
+ "BLEU": bleu_score,
685
+ }
686
+
687
+ df_list.append(res)
688
+
689
+
690
+ df = pd.DataFrame(df_list)
691
+ # If there are any models that are the same, merge them
692
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
693
+ df = df.groupby("Model", as_index=False).first()
694
+ # Put 'Model' column first
695
+ #cols = sorted(list(df.columns))
696
+ cols = list(df.columns)
697
+ cols.insert(0, cols.pop(cols.index("Model")))
698
+ df = df[cols]
699
+
700
+ if rank:
701
+ df = add_rank(df, compute_average=True)
702
+
703
+ if fillna:
704
+ df.fillna("", inplace=True)
705
+
706
+ return df
707
+
708
+
709
+ FLORES_VIE2ENG_ZERO_SHOT = get_data_flores_vie2eng(eval_mode="zero_shot")
710
+ FLORES_VIE2ENG_FIVE_SHOT = get_data_flores_vie2eng(eval_mode="five_shot")
711
+
712
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
713
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
714
+
715
+
716
+ def get_data_flores_zho2eng(eval_mode='zero_shot', fillna=True, rank=True):
717
+
718
+ df_list = []
719
+
720
+ for model in MODEL_LIST:
721
+
722
+
723
+ results_list = [ALL_RESULTS[model][eval_mode]['flores_zho2eng'][res] for res in ALL_RESULTS[model][eval_mode]['flores_zho2eng']]
724
+
725
+
726
+ try:
727
+ bleu_score = median([results['bleu_score'] for results in results_list])
728
+
729
+ except:
730
+ print(results_list)
731
+ bleu_score = -1
732
+
733
+
734
+ res = {
735
+ "Model Size (Params)": MODEL_TO_SIZE.get(model, ""),
736
+ "Model": make_clickable_model(model, link=ALL_RESULTS[model]["model_link"]),
737
+ "BLEU": bleu_score,
738
+ }
739
+
740
+ df_list.append(res)
741
+
742
+
743
+ df = pd.DataFrame(df_list)
744
+ # If there are any models that are the same, merge them
745
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
746
+ df = df.groupby("Model", as_index=False).first()
747
+ # Put 'Model' column first
748
+ #cols = sorted(list(df.columns))
749
+ cols = list(df.columns)
750
+ cols.insert(0, cols.pop(cols.index("Model")))
751
+ df = df[cols]
752
+
753
+ if rank:
754
+ df = add_rank(df, compute_average=True)
755
+
756
+ if fillna:
757
+ df.fillna("", inplace=True)
758
+
759
+ return df
760
+
761
+
762
+ FLORES_ZHO2ENG_ZERO_SHOT = get_data_flores_zho2eng(eval_mode="zero_shot")
763
+ FLORES_ZHO2ENG_FIVE_SHOT = get_data_flores_zho2eng(eval_mode="five_shot")
764
+
765
+
766
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
767
+ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
768
+
769
+
770
+ def get_data_flores_zsm2eng(eval_mode='zero_shot', fillna=True, rank=True):
771
+
772
+ df_list = []
773
+
774
+ for model in MODEL_LIST:
775
+
776
+
777
+ results_list = [ALL_RESULTS[model][eval_mode]['flores_zsm2eng'][res] for res in ALL_RESULTS[model][eval_mode]['flores_zsm2eng']]
778
+
779
+
780
+ try:
781
+ bleu_score = median([results['bleu_score'] for results in results_list])
782
+
783
+ except:
784
+ print(results_list)
785
+ bleu_score = -1
786
+
787
+
788
+ res = {
789
+ "Model Size (Params)": MODEL_TO_SIZE.get(model, ""),
790
+ "Model": make_clickable_model(model, link=ALL_RESULTS[model]["model_link"]),
791
+ "BLEU": bleu_score,
792
+ }
793
+
794
+ df_list.append(res)
795
+
796
+
797
+ df = pd.DataFrame(df_list)
798
+ # If there are any models that are the same, merge them
799
+ # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
800
+ df = df.groupby("Model", as_index=False).first()
801
+ # Put 'Model' column first
802
+ #cols = sorted(list(df.columns))
803
+ cols = list(df.columns)
804
+ cols.insert(0, cols.pop(cols.index("Model")))
805
+ df = df[cols]
806
+
807
+ if rank:
808
+ df = add_rank(df, compute_average=True)
809
+
810
+ if fillna:
811
+ df.fillna("", inplace=True)
812
+
813
+ return df
814
+
815
+
816
+ FLORES_ZSM2ENG_ZERO_SHOT = get_data_flores_zho2eng(eval_mode="zero_shot")
817
+ FLORES_ZSM2ENG_FIVE_SHOT = get_data_flores_zho2eng(eval_mode="five_shot")
818
+
819
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
820
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
821
 
 
1061
  )
1062
 
1063
 
1064
+ # dataset 7:
1065
+ with gr.TabItem("Singlish to English Translation"):
1066
+ with gr.Row():
1067
+ gr.Markdown("""
1068
+ **SING2ENG Leaderboard** 🔮
1069
+
1070
+ - **Metric:** BLEU Avg.
1071
+ - **Languages:** English
1072
+ """)
1073
+
1074
+ with gr.TabItem("zero_shot"):
1075
+ with gr.TabItem("Overall"):
1076
+ with gr.Row():
1077
+ gr.components.Dataframe(
1078
+ SING2ENG_ZERO_SHOT,
1079
+ datatype=["number", "markdown"] + ["number"] * len(SING2ENG_ZERO_SHOT.columns),
1080
+ type="pandas",
1081
+ )
1082
+
1083
+ with gr.TabItem("five_shot"):
1084
+ with gr.TabItem("Overall"):
1085
+ with gr.Row():
1086
+ gr.components.Dataframe(
1087
+ SING2ENG_FIVE_SHOT,
1088
+ datatype=["number", "markdown"] + ["number"] * len(SING2ENG_FIVE_SHOT.columns),
1089
+ type="pandas",
1090
+ )
1091
+
1092
 
1093
+ # dataset 8:
1094
+ with gr.TabItem("FLORES Indonesian to English Translation"):
1095
+ with gr.Row():
1096
+ gr.Markdown("""
1097
+ **flores_ind2eng Leaderboard** 🔮
1098
+
1099
+ - **Metric:** BLEU Avg.
1100
+ - **Languages:** English
1101
+ """)
1102
+
1103
+ with gr.TabItem("zero_shot"):
1104
+ with gr.TabItem("Overall"):
1105
+ with gr.Row():
1106
+ gr.components.Dataframe(
1107
+ FLORES_IND2ENG_ZERO_SHOT,
1108
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_ZERO_SHOT.columns),
1109
+ type="pandas",
1110
+ )
1111
+
1112
+ with gr.TabItem("five_shot"):
1113
+ with gr.TabItem("Overall"):
1114
+ with gr.Row():
1115
+ gr.components.Dataframe(
1116
+ FLORES_IND2ENG_FIVE_SHOT,
1117
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_FIVE_SHOT.columns),
1118
+ type="pandas",
1119
+ )
1120
+
1121
+
1122
+ # dataset 9:
1123
+ with gr.TabItem("FLORES Vitenamese to English Translation"):
1124
+ with gr.Row():
1125
+ gr.Markdown("""
1126
+ **flores_vie2eng Leaderboard** 🔮
1127
+
1128
+ - **Metric:** BLEU Avg.
1129
+ - **Languages:** English
1130
+ """)
1131
+
1132
+ with gr.TabItem("zero_shot"):
1133
+ with gr.TabItem("Overall"):
1134
+ with gr.Row():
1135
+ gr.components.Dataframe(
1136
+ FLORES_VIE2ENG_ZERO_SHOT,
1137
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_ZERO_SHOT.columns),
1138
+ type="pandas",
1139
+ )
1140
+
1141
+ with gr.TabItem("five_shot"):
1142
+ with gr.TabItem("Overall"):
1143
+ with gr.Row():
1144
+ gr.components.Dataframe(
1145
+ FLORES_VIE2ENG_FIVE_SHOT,
1146
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_FIVE_SHOT.columns),
1147
+ type="pandas",
1148
+ )
1149
+
1150
+
1151
+
1152
+ # dataset 10:
1153
+ with gr.TabItem("FLORES Chinese to English Translation"):
1154
+ with gr.Row():
1155
+ gr.Markdown("""
1156
+ **flores_zho2eng Leaderboard** 🔮
1157
+
1158
+ - **Metric:** BLEU Avg.
1159
+ - **Languages:** English
1160
+ """)
1161
+
1162
+ with gr.TabItem("zero_shot"):
1163
+ with gr.TabItem("Overall"):
1164
+ with gr.Row():
1165
+ gr.components.Dataframe(
1166
+ FLORES_ZHO2ENG_ZERO_SHOT,
1167
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_ZERO_SHOT.columns),
1168
+ type="pandas",
1169
+ )
1170
+
1171
+ with gr.TabItem("five_shot"):
1172
+ with gr.TabItem("Overall"):
1173
+ with gr.Row():
1174
+ gr.components.Dataframe(
1175
+ FLORES_ZHO2ENG_FIVE_SHOT,
1176
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_FIVE_SHOT.columns),
1177
+ type="pandas",
1178
+ )
1179
+
1180
+
1181
+
1182
+ # dataset 10:
1183
+ with gr.TabItem("FLORES Malay to English Translation"):
1184
+ with gr.Row():
1185
+ gr.Markdown("""
1186
+ **flores_zsm2eng Leaderboard** 🔮
1187
+
1188
+ - **Metric:** BLEU Avg.
1189
+ - **Languages:** English
1190
+ """)
1191
+
1192
+ with gr.TabItem("zero_shot"):
1193
+ with gr.TabItem("Overall"):
1194
+ with gr.Row():
1195
+ gr.components.Dataframe(
1196
+ FLORES_ZSM2ENG_ZERO_SHOT,
1197
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_ZERO_SHOT.columns),
1198
+ type="pandas",
1199
+ )
1200
+
1201
+ with gr.TabItem("five_shot"):
1202
+ with gr.TabItem("Overall"):
1203
+ with gr.Row():
1204
+ gr.components.Dataframe(
1205
+ FLORES_ZSM2ENG_FIVE_SHOT,
1206
+ datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_FIVE_SHOT.columns),
1207
+ type="pandas",
1208
+ )
1209
 
1210
  gr.Markdown(r"""
1211