Muennighoff commited on
Commit
4b8d901
1 Parent(s): 3ae8f23
Files changed (1) hide show
  1. app.py +58 -20
app.py CHANGED
@@ -121,6 +121,20 @@ TASK_LIST_RETRIEVAL = [
121
  "TRECCOVID",
122
  ]
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
125
  "CQADupstackAndroidRetrieval",
126
  "CQADupstackEnglishRetrieval",
@@ -735,6 +749,7 @@ DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIF
735
  DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
736
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
737
  DATA_CLUSTERING_GERMAN = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
 
738
  DATA_STS = get_mteb_data(["STS"])
739
 
740
  # Exact, add all non-nan integer values for every dataset
@@ -1072,26 +1087,49 @@ with block:
1072
  get_mteb_data, inputs=[task_reranking], outputs=data_reranking
1073
  )
1074
  with gr.TabItem("Retrieval"):
1075
- with gr.Row():
1076
- gr.Markdown("""
1077
- **Retrieval Leaderboard 🔎**
1078
-
1079
- - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1080
- - **Languages:** English
1081
- """)
1082
- with gr.Row():
1083
- data_retrieval = gr.components.Dataframe(
1084
- DATA_RETRIEVAL,
1085
- # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1086
- datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
1087
- type="pandas",
1088
- )
1089
- with gr.Row():
1090
- data_run = gr.Button("Refresh")
1091
- task_retrieval = gr.Variable(value=["Retrieval"])
1092
- data_run.click(
1093
- get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
1094
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1095
  with gr.TabItem("STS"):
1096
  with gr.TabItem("English"):
1097
  with gr.Row():
 
121
  "TRECCOVID",
122
  ]
123
 
124
+ TASK_LIST_RETRIEVAL_PL = [
125
+ "ArguAna-PL",
126
+ "DBPedia-PL",
127
+ "FiQA2018-PL",
128
+ "HotpotQA-PL",
129
+ "MSMARCO-PL",
130
+ "NFCorpus-PL",
131
+ "NQ-PL",
132
+ "Quora-PL",
133
+ "SCIDOCS-PL",
134
+ "SciFact-PL",
135
+ "TRECCOVID-PL",
136
+ ]
137
+
138
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
139
  "CQADupstackAndroidRetrieval",
140
  "CQADupstackEnglishRetrieval",
 
749
  DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
750
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
751
  DATA_CLUSTERING_GERMAN = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
752
+ DATA_RETRIEVAL_PL = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_PL)
753
  DATA_STS = get_mteb_data(["STS"])
754
 
755
  # Exact, add all non-nan integer values for every dataset
 
1087
  get_mteb_data, inputs=[task_reranking], outputs=data_reranking
1088
  )
1089
  with gr.TabItem("Retrieval"):
1090
+ with gr.TabItem("English"):
1091
+ with gr.Row():
1092
+ gr.Markdown("""
1093
+ **Retrieval Leaderboard 🔎**
1094
+
1095
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1096
+ - **Languages:** English
1097
+ """)
1098
+ with gr.Row():
1099
+ data_retrieval = gr.components.Dataframe(
1100
+ DATA_RETRIEVAL,
1101
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1102
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
1103
+ type="pandas",
1104
+ )
1105
+ with gr.Row():
1106
+ data_run = gr.Button("Refresh")
1107
+ task_retrieval = gr.Variable(value=["Retrieval"])
1108
+ data_run.click(
1109
+ get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
1110
+ )
1111
+ with gr.TabItem("Polish"):
1112
+ with gr.Row():
1113
+ gr.Markdown("""
1114
+ **Retrieval Leaderboard 🇵🇱**
1115
+
1116
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
1117
+ - **Languages:** Polish
1118
+ - **Credits:** [Konrad Wojtasik](https://github.com/kwojtasi) & [BEIR-PL](https://arxiv.org/abs/2305.19840)
1119
+ """)
1120
+ with gr.Row():
1121
+ data_retrieval_pl = gr.components.Dataframe(
1122
+ DATA_RETRIEVAL_PL,
1123
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1124
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_PL.columns) * 2,
1125
+ type="pandas",
1126
+ )
1127
+ with gr.Row():
1128
+ data_run = gr.Button("Refresh")
1129
+ task_retrieval_pl = gr.Variable(value=["Retrieval"])
1130
+ data_run.click(
1131
+ get_mteb_data, inputs=[task_retrieval_pl], outputs=data_retrieval_pl
1132
+ )
1133
  with gr.TabItem("STS"):
1134
  with gr.TabItem("English"):
1135
  with gr.Row():