Spaces:
Sleeping
Sleeping
Commit
·
17e0108
1
Parent(s):
a51beac
Fix dataframe dtypes for proper sorting
Browse files
app.py
CHANGED
|
@@ -206,7 +206,7 @@ for model in EXTERNAL_MODELS:
|
|
| 206 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 207 |
|
| 208 |
|
| 209 |
-
def get_mteb_data(tasks=["Clustering"], langs=[],
|
| 210 |
api = HfApi()
|
| 211 |
models = api.list_models(filter="mteb")
|
| 212 |
# Initialize list to models that we cannot fetch metadata from
|
|
@@ -255,8 +255,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metr
|
|
| 255 |
cols.insert(0, cols.pop(cols.index("Model")))
|
| 256 |
df = df[cols]
|
| 257 |
df.fillna("", inplace=True)
|
| 258 |
-
if cast_to_str:
|
| 259 |
-
return df.astype(str) # Cast to str as Gradio does not accept floats
|
| 260 |
return df
|
| 261 |
|
| 262 |
def get_mteb_average():
|
|
@@ -272,7 +270,6 @@ def get_mteb_average():
|
|
| 272 |
"Summarization",
|
| 273 |
],
|
| 274 |
langs=["en", "en-en"],
|
| 275 |
-
cast_to_str=False
|
| 276 |
)
|
| 277 |
# Approximation (Missing Bitext Mining & including some nans)
|
| 278 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
|
@@ -292,7 +289,7 @@ def get_mteb_average():
|
|
| 292 |
# Start ranking from 1
|
| 293 |
DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
|
| 294 |
|
| 295 |
-
DATA_OVERALL = DATA_OVERALL.round(2)
|
| 296 |
|
| 297 |
DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
|
| 298 |
DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
|
|
@@ -331,7 +328,7 @@ with block:
|
|
| 331 |
with gr.Row():
|
| 332 |
data_overall = gr.components.Dataframe(
|
| 333 |
DATA_OVERALL,
|
| 334 |
-
datatype=["markdown"] * len(DATA_OVERALL.columns)
|
| 335 |
type="pandas",
|
| 336 |
wrap=True,
|
| 337 |
)
|
|
@@ -348,7 +345,7 @@ with block:
|
|
| 348 |
""")
|
| 349 |
with gr.Row():
|
| 350 |
data_bitext_mining = gr.components.Dataframe(
|
| 351 |
-
datatype=["markdown"] * 500, # hack when we don't know how many columns
|
| 352 |
type="pandas",
|
| 353 |
)
|
| 354 |
with gr.Row():
|
|
@@ -371,7 +368,7 @@ with block:
|
|
| 371 |
with gr.Row():
|
| 372 |
data_classification_en = gr.components.Dataframe(
|
| 373 |
DATA_CLASSIFICATION_EN,
|
| 374 |
-
datatype=["markdown"] * len(DATA_CLASSIFICATION_EN.columns)
|
| 375 |
type="pandas",
|
| 376 |
)
|
| 377 |
with gr.Row():
|
|
@@ -396,7 +393,7 @@ with block:
|
|
| 396 |
""")
|
| 397 |
with gr.Row():
|
| 398 |
data_classification = gr.components.Dataframe(
|
| 399 |
-
datatype=["markdown"] * 200, # hack when we don't know how many columns
|
| 400 |
type="pandas",
|
| 401 |
)
|
| 402 |
with gr.Row():
|
|
@@ -418,7 +415,7 @@ with block:
|
|
| 418 |
with gr.Row():
|
| 419 |
data_clustering = gr.components.Dataframe(
|
| 420 |
DATA_CLUSTERING,
|
| 421 |
-
datatype=["markdown"] * len(DATA_CLUSTERING.columns)
|
| 422 |
type="pandas",
|
| 423 |
)
|
| 424 |
with gr.Row():
|
|
@@ -440,7 +437,7 @@ with block:
|
|
| 440 |
with gr.Row():
|
| 441 |
data_pair_classification = gr.components.Dataframe(
|
| 442 |
DATA_PAIR_CLASSIFICATION,
|
| 443 |
-
datatype=["markdown"] * len(DATA_PAIR_CLASSIFICATION.columns)
|
| 444 |
type="pandas",
|
| 445 |
)
|
| 446 |
with gr.Row():
|
|
@@ -462,7 +459,8 @@ with block:
|
|
| 462 |
with gr.Row():
|
| 463 |
data_retrieval = gr.components.Dataframe(
|
| 464 |
DATA_RETRIEVAL,
|
| 465 |
-
|
|
|
|
| 466 |
type="pandas",
|
| 467 |
)
|
| 468 |
with gr.Row():
|
|
@@ -482,7 +480,7 @@ with block:
|
|
| 482 |
with gr.Row():
|
| 483 |
data_reranking = gr.components.Dataframe(
|
| 484 |
DATA_RERANKING,
|
| 485 |
-
datatype=["markdown"] * len(DATA_RERANKING.columns)
|
| 486 |
type="pandas",
|
| 487 |
)
|
| 488 |
with gr.Row():
|
|
@@ -504,7 +502,7 @@ with block:
|
|
| 504 |
with gr.Row():
|
| 505 |
data_sts_en = gr.components.Dataframe(
|
| 506 |
DATA_STS_EN,
|
| 507 |
-
datatype=["markdown"] * len(DATA_STS_EN.columns)
|
| 508 |
type="pandas",
|
| 509 |
)
|
| 510 |
with gr.Row():
|
|
@@ -526,7 +524,7 @@ with block:
|
|
| 526 |
""")
|
| 527 |
with gr.Row():
|
| 528 |
data_sts = gr.components.Dataframe(
|
| 529 |
-
datatype=["markdown"] *
|
| 530 |
type="pandas",
|
| 531 |
)
|
| 532 |
with gr.Row():
|
|
@@ -543,8 +541,8 @@ with block:
|
|
| 543 |
""")
|
| 544 |
with gr.Row():
|
| 545 |
data_summarization = gr.components.Dataframe(
|
| 546 |
-
DATA_SUMMARIZATION
|
| 547 |
-
datatype="markdown",
|
| 548 |
type="pandas",
|
| 549 |
)
|
| 550 |
with gr.Row():
|
|
@@ -564,6 +562,7 @@ with block:
|
|
| 564 |
block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
|
| 565 |
block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
|
| 566 |
block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
|
|
|
|
| 567 |
block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
|
| 568 |
block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
|
| 569 |
block.load(get_mteb_data, inputs=[task_sts_en, lang_sts_en], outputs=data_sts_en)
|
|
@@ -577,6 +576,7 @@ block.launch()
|
|
| 577 |
# Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
|
| 578 |
# Could make it load in the background without the Gradio logo closer to the Deep RL space
|
| 579 |
# Could add graphs / other visual content
|
|
|
|
| 580 |
|
| 581 |
# Sources:
|
| 582 |
# https://huggingface.co/spaces/gradio/leaderboard
|
|
|
|
| 206 |
EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
|
| 207 |
|
| 208 |
|
| 209 |
+
def get_mteb_data(tasks=["Clustering"], langs=[], task_to_metric=TASK_TO_METRIC):
|
| 210 |
api = HfApi()
|
| 211 |
models = api.list_models(filter="mteb")
|
| 212 |
# Initialize list to models that we cannot fetch metadata from
|
|
|
|
| 255 |
cols.insert(0, cols.pop(cols.index("Model")))
|
| 256 |
df = df[cols]
|
| 257 |
df.fillna("", inplace=True)
|
|
|
|
|
|
|
| 258 |
return df
|
| 259 |
|
| 260 |
def get_mteb_average():
|
|
|
|
| 270 |
"Summarization",
|
| 271 |
],
|
| 272 |
langs=["en", "en-en"],
|
|
|
|
| 273 |
)
|
| 274 |
# Approximation (Missing Bitext Mining & including some nans)
|
| 275 |
NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
|
|
|
|
| 289 |
# Start ranking from 1
|
| 290 |
DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
|
| 291 |
|
| 292 |
+
DATA_OVERALL = DATA_OVERALL.round(2)
|
| 293 |
|
| 294 |
DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
|
| 295 |
DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
|
|
|
|
| 328 |
with gr.Row():
|
| 329 |
data_overall = gr.components.Dataframe(
|
| 330 |
DATA_OVERALL,
|
| 331 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
|
| 332 |
type="pandas",
|
| 333 |
wrap=True,
|
| 334 |
)
|
|
|
|
| 345 |
""")
|
| 346 |
with gr.Row():
|
| 347 |
data_bitext_mining = gr.components.Dataframe(
|
| 348 |
+
datatype=["markdown"] + ["number"] * 500, # hack when we don't know how many columns
|
| 349 |
type="pandas",
|
| 350 |
)
|
| 351 |
with gr.Row():
|
|
|
|
| 368 |
with gr.Row():
|
| 369 |
data_classification_en = gr.components.Dataframe(
|
| 370 |
DATA_CLASSIFICATION_EN,
|
| 371 |
+
datatype=["markdown"] + ["number"] * len(DATA_CLASSIFICATION_EN.columns),
|
| 372 |
type="pandas",
|
| 373 |
)
|
| 374 |
with gr.Row():
|
|
|
|
| 393 |
""")
|
| 394 |
with gr.Row():
|
| 395 |
data_classification = gr.components.Dataframe(
|
| 396 |
+
datatype=["markdown"] + ["number"] * 200, # hack when we don't know how many columns
|
| 397 |
type="pandas",
|
| 398 |
)
|
| 399 |
with gr.Row():
|
|
|
|
| 415 |
with gr.Row():
|
| 416 |
data_clustering = gr.components.Dataframe(
|
| 417 |
DATA_CLUSTERING,
|
| 418 |
+
datatype=["markdown"] + ["number"] * len(DATA_CLUSTERING.columns),
|
| 419 |
type="pandas",
|
| 420 |
)
|
| 421 |
with gr.Row():
|
|
|
|
| 437 |
with gr.Row():
|
| 438 |
data_pair_classification = gr.components.Dataframe(
|
| 439 |
DATA_PAIR_CLASSIFICATION,
|
| 440 |
+
datatype=["markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION.columns),
|
| 441 |
type="pandas",
|
| 442 |
)
|
| 443 |
with gr.Row():
|
|
|
|
| 459 |
with gr.Row():
|
| 460 |
data_retrieval = gr.components.Dataframe(
|
| 461 |
DATA_RETRIEVAL,
|
| 462 |
+
# Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
|
| 463 |
+
datatype=["markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
|
| 464 |
type="pandas",
|
| 465 |
)
|
| 466 |
with gr.Row():
|
|
|
|
| 480 |
with gr.Row():
|
| 481 |
data_reranking = gr.components.Dataframe(
|
| 482 |
DATA_RERANKING,
|
| 483 |
+
datatype=["markdown"] + ["number"] * len(DATA_RERANKING.columns),
|
| 484 |
type="pandas",
|
| 485 |
)
|
| 486 |
with gr.Row():
|
|
|
|
| 502 |
with gr.Row():
|
| 503 |
data_sts_en = gr.components.Dataframe(
|
| 504 |
DATA_STS_EN,
|
| 505 |
+
datatype=["markdown"] + ["number"] * len(DATA_STS_EN.columns),
|
| 506 |
type="pandas",
|
| 507 |
)
|
| 508 |
with gr.Row():
|
|
|
|
| 524 |
""")
|
| 525 |
with gr.Row():
|
| 526 |
data_sts = gr.components.Dataframe(
|
| 527 |
+
datatype=["markdown"] + ["number"] * 100, # hack when we don't know how many columns
|
| 528 |
type="pandas",
|
| 529 |
)
|
| 530 |
with gr.Row():
|
|
|
|
| 541 |
""")
|
| 542 |
with gr.Row():
|
| 543 |
data_summarization = gr.components.Dataframe(
|
| 544 |
+
DATA_SUMMARIZATION,
|
| 545 |
+
datatype=["markdown"] + ["number"] * 2,
|
| 546 |
type="pandas",
|
| 547 |
)
|
| 548 |
with gr.Row():
|
|
|
|
| 562 |
block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
|
| 563 |
block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
|
| 564 |
block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
|
| 565 |
+
block.load(get_mteb_data, inputs=[task_pair_classification], outputs=data_pair_classification)
|
| 566 |
block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
|
| 567 |
block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
|
| 568 |
block.load(get_mteb_data, inputs=[task_sts_en, lang_sts_en], outputs=data_sts_en)
|
|
|
|
| 576 |
# Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
|
| 577 |
# Could make it load in the background without the Gradio logo closer to the Deep RL space
|
| 578 |
# Could add graphs / other visual content
|
| 579 |
+
# Could add verification marks
|
| 580 |
|
| 581 |
# Sources:
|
| 582 |
# https://huggingface.co/spaces/gradio/leaderboard
|