rodrigomasini commited on
Commit
e7060c6
1 Parent(s): bca2d6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -744,15 +744,15 @@ def get_mteb_average():
744
  # Debugging:
745
  # DATA_OVERALL.to_csv("overall.csv")
746
 
747
- DATA_OVERALL.insert(1, f"Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
748
- DATA_OVERALL.insert(2, f"Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
749
- DATA_OVERALL.insert(3, f"Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
750
- DATA_OVERALL.insert(4, f"Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
751
- DATA_OVERALL.insert(5, f"Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
752
- DATA_OVERALL.insert(6, f"Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
753
- DATA_OVERALL.insert(7, f"STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
754
- DATA_OVERALL.insert(8, f"Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
755
- DATA_OVERALL.sort_values(f"Average", ascending=False, inplace=True)
756
  # Start ranking from 1
757
  DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
758
 
@@ -769,7 +769,9 @@ def get_mteb_average():
769
  # Fill NaN after averaging
770
  DATA_OVERALL.fillna("", inplace=True)
771
 
772
- DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average", f"Classification Average", f"Clustering Average", f"Pair Classification Average", f"Reranking Average", f"Retrieval Average", f"STS Average", f"Summarization Average"]]
 
 
773
  DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
774
 
775
  return DATA_OVERALL
@@ -791,10 +793,6 @@ def remove_invalid_unicode(input_string):
791
  return ''.join(valid_chars)
792
  else:
793
  return input_string # Return non-string values as is
794
-
795
- for column in DATA_OVERALL.columns:
796
- if DATA_OVERALL[column].dtype == 'object':
797
- DATA_OVERALL[column] = DATA_OVERALL[column].apply(remove_invalid_unicode)
798
 
799
  from dataclasses import dataclass
800
 
@@ -810,7 +808,7 @@ DATA_OVERALL_COLUMN_TO_DATATYPE = [
810
  LeaderboardColumn("Max Tokens", "number"),
811
  LeaderboardColumn("Average", "number"),
812
  LeaderboardColumn("Classification Average", "number"),
813
- LeaderboardColumn("Classification Average", "number"),
814
  LeaderboardColumn("Pair Classification Average", "number"),
815
  LeaderboardColumn("Reranking Average", "number"),
816
  LeaderboardColumn("Retrieval Average", "number"),
 
744
  # Debugging:
745
  # DATA_OVERALL.to_csv("overall.csv")
746
 
747
+ DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
748
+ DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
749
+ DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
750
+ DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
751
+ DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
752
+ DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
753
+ DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
754
+ DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
755
+ DATA_OVERALL.sort_values("Average", ascending=False, inplace=True)
756
  # Start ranking from 1
757
  DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
758
 
 
769
  # Fill NaN after averaging
770
  DATA_OVERALL.fillna("", inplace=True)
771
 
772
+ DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", "Average",
773
+ "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average",
774
+ "STS Average", "Summarization Average"]]
775
  DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
776
 
777
  return DATA_OVERALL
 
793
  return ''.join(valid_chars)
794
  else:
795
  return input_string # Return non-string values as is
 
 
 
 
796
 
797
  from dataclasses import dataclass
798
 
 
808
  LeaderboardColumn("Max Tokens", "number"),
809
  LeaderboardColumn("Average", "number"),
810
  LeaderboardColumn("Classification Average", "number"),
811
+ LeaderboardColumn("Clustering Average", "number"),
812
  LeaderboardColumn("Pair Classification Average", "number"),
813
  LeaderboardColumn("Reranking Average", "number"),
814
  LeaderboardColumn("Retrieval Average", "number"),