Spaces:
Running
Running
Commit
·
ac3fdf5
1
Parent(s):
842d3bc
Rename BTM
Browse files
app.py
CHANGED
|
@@ -54,7 +54,7 @@ TASK_LIST_CLASSIFICATION_NB = [
|
|
| 54 |
"NorwegianParliament",
|
| 55 |
"MassiveIntentClassification (nb)",
|
| 56 |
"MassiveScenarioClassification (nb)",
|
| 57 |
-
"ScalaNbClassification
|
| 58 |
]
|
| 59 |
|
| 60 |
TASK_LIST_CLASSIFICATION_SV = [
|
|
@@ -62,7 +62,6 @@ TASK_LIST_CLASSIFICATION_SV = [
|
|
| 62 |
"MassiveIntentClassification (sv)",
|
| 63 |
"MassiveScenarioClassification (sv)",
|
| 64 |
"NordicLangClassification",
|
| 65 |
-
"ScalaNbClassification",
|
| 66 |
"ScalaSvClassification",
|
| 67 |
"SweRecClassification",
|
| 68 |
]
|
|
@@ -587,6 +586,15 @@ def get_dim_seq_size(model):
|
|
| 587 |
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
| 588 |
return dim, seq, size
|
| 589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
def add_rank(df):
|
| 591 |
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
|
| 592 |
if len(cols_to_rank) == 1:
|
|
@@ -659,8 +667,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 659 |
df = pd.DataFrame(df_list)
|
| 660 |
# If there are any models that are the same, merge them
|
| 661 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
| 662 |
-
# Save to csv
|
| 663 |
-
df.to_csv("mteb.csv", index=False)
|
| 664 |
df = df.groupby("Model", as_index=False).first()
|
| 665 |
# Put 'Model' column first
|
| 666 |
cols = sorted(list(df.columns))
|
|
@@ -780,7 +786,7 @@ with block:
|
|
| 780 |
with gr.TabItem("English-X"):
|
| 781 |
with gr.Row():
|
| 782 |
gr.Markdown("""
|
| 783 |
-
**Bitext Mining Leaderboard
|
| 784 |
|
| 785 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 786 |
- **Languages:** 117 (Pairs of: English & other language)
|
|
@@ -801,13 +807,13 @@ with block:
|
|
| 801 |
inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
|
| 802 |
outputs=data_bitext_mining,
|
| 803 |
)
|
| 804 |
-
with gr.TabItem("
|
| 805 |
with gr.Row():
|
| 806 |
gr.Markdown("""
|
| 807 |
-
**Bitext Mining
|
| 808 |
|
| 809 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 810 |
-
- **Languages:**
|
| 811 |
- **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
|
| 812 |
""")
|
| 813 |
with gr.Row():
|
|
|
|
| 54 |
"NorwegianParliament",
|
| 55 |
"MassiveIntentClassification (nb)",
|
| 56 |
"MassiveScenarioClassification (nb)",
|
| 57 |
+
"ScalaNbClassification",
|
| 58 |
]
|
| 59 |
|
| 60 |
TASK_LIST_CLASSIFICATION_SV = [
|
|
|
|
| 62 |
"MassiveIntentClassification (sv)",
|
| 63 |
"MassiveScenarioClassification (sv)",
|
| 64 |
"NordicLangClassification",
|
|
|
|
| 65 |
"ScalaSvClassification",
|
| 66 |
"SweRecClassification",
|
| 67 |
]
|
|
|
|
| 586 |
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
| 587 |
return dim, seq, size
|
| 588 |
|
| 589 |
+
def make_datasets_clickable(df):
|
| 590 |
+
"""Does not work"""
|
| 591 |
+
if "BornholmBitextMining" in df.columns:
|
| 592 |
+
link = "https://huggingface.co/datasets/strombergnlp/bornholmsk_parallel"
|
| 593 |
+
df = df.rename(
|
| 594 |
+
columns={f'BornholmBitextMining': '<a target="_blank" style="text-decoration: underline" href="{link}">BornholmBitextMining</a>',})
|
| 595 |
+
return df
|
| 596 |
+
|
| 597 |
+
|
| 598 |
def add_rank(df):
|
| 599 |
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
|
| 600 |
if len(cols_to_rank) == 1:
|
|
|
|
| 667 |
df = pd.DataFrame(df_list)
|
| 668 |
# If there are any models that are the same, merge them
|
| 669 |
# E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
|
|
|
|
|
|
|
| 670 |
df = df.groupby("Model", as_index=False).first()
|
| 671 |
# Put 'Model' column first
|
| 672 |
cols = sorted(list(df.columns))
|
|
|
|
| 786 |
with gr.TabItem("English-X"):
|
| 787 |
with gr.Row():
|
| 788 |
gr.Markdown("""
|
| 789 |
+
**Bitext Mining Leaderboard 🎌**
|
| 790 |
|
| 791 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 792 |
- **Languages:** 117 (Pairs of: English & other language)
|
|
|
|
| 807 |
inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
|
| 808 |
outputs=data_bitext_mining,
|
| 809 |
)
|
| 810 |
+
with gr.TabItem("Danish"):
|
| 811 |
with gr.Row():
|
| 812 |
gr.Markdown("""
|
| 813 |
+
**Bitext Mining Danish Leaderboard 🇩🇰🎌**
|
| 814 |
|
| 815 |
- **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
|
| 816 |
+
- **Languages:** Danish & Bornholmsk (Danish Dialect)
|
| 817 |
- **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
|
| 818 |
""")
|
| 819 |
with gr.Row():
|