Kang Suhyun suhyun.kang commited on
Commit
65566f3
1 Parent(s): 519572d

[#9] Add language filter options to translation leaderboard (#51)

Browse files

* [#9] Add language filter options to translation leaderboard

This change adds the ability to filter the translation leaderboard by language.

The user can select a source language and a target language, and the leaderboard will display the scores for that language pair.

Optimization is not considered in this change.

Screenshot: https://screen.yanolja.in/avEVnBkaotnqunIh.png

* Remove Apply button

---------

Co-authored-by: suhyun.kang <suhyun.kang@yanolja.group>

Files changed (2) hide show
  1. app.py +1 -4
  2. leaderboard.py +61 -13
app.py CHANGED
@@ -9,13 +9,10 @@ import gradio as gr
9
 
10
  from leaderboard import build_leaderboard
11
  from leaderboard import db
 
12
  import response
13
  from response import get_responses
14
 
15
- SUPPORTED_TRANSLATION_LANGUAGES = [
16
- "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
17
- ]
18
-
19
 
20
  class VoteOptions(enum.Enum):
21
  MODEL_A = "Model A is better"
 
9
 
10
  from leaderboard import build_leaderboard
11
  from leaderboard import db
12
+ from leaderboard import SUPPORTED_TRANSLATION_LANGUAGES
13
  import response
14
  from response import get_responses
15
 
 
 
 
 
16
 
17
  class VoteOptions(enum.Enum):
18
  MODEL_A = "Model A is better"
leaderboard.py CHANGED
@@ -9,6 +9,7 @@ import math
9
  import firebase_admin
10
  from firebase_admin import credentials
11
  from firebase_admin import firestore
 
12
  import gradio as gr
13
  import pandas as pd
14
 
@@ -18,6 +19,10 @@ from credentials import get_credentials_json
18
  firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
19
  db = firestore.client()
20
 
 
 
 
 
21
 
22
  class LeaderboardTab(enum.Enum):
23
  SUMMARIZATION = "Summarization"
@@ -44,16 +49,26 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
44
  return rating
45
 
46
 
47
- def get_docs(tab):
48
  if tab == LeaderboardTab.SUMMARIZATION:
49
  return db.collection("arena-summarizations").order_by("timestamp").stream()
50
 
51
  if tab == LeaderboardTab.TRANSLATION:
52
- return db.collection("arena-translations").order_by("timestamp").stream()
 
 
 
 
 
 
 
 
53
 
 
54
 
55
- def load_elo_ratings(tab):
56
- docs = get_docs(tab)
 
57
 
58
  battles = []
59
  for doc in docs:
@@ -75,16 +90,27 @@ def load_elo_ratings(tab):
75
  for i, (model, rating) in enumerate(sorted_ratings)]
76
 
77
 
78
- def load_summarization_elo_ratings():
79
- return load_elo_ratings(LeaderboardTab.SUMMARIZATION)
80
 
 
 
 
 
81
 
82
- def load_translation_elo_ratings():
83
- return load_elo_ratings(LeaderboardTab.TRANSLATION)
 
 
 
 
 
84
 
85
 
86
- LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
87
- LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
 
 
88
 
89
 
90
  def build_leaderboard():
@@ -92,16 +118,38 @@ def build_leaderboard():
92
  with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
93
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
94
  datatype=["number", "str", "number"],
95
- value=load_summarization_elo_ratings,
96
  every=LEADERBOARD_UPDATE_INTERVAL,
97
  elem_classes="leaderboard")
98
  gr.Markdown(LEADERBOARD_INFO)
99
 
100
- # TODO(#9): Add language filter options.
101
  with gr.Tab(LeaderboardTab.TRANSLATION.value):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
103
  datatype=["number", "str", "number"],
104
- value=load_translation_elo_ratings,
105
  every=LEADERBOARD_UPDATE_INTERVAL,
106
  elem_classes="leaderboard")
107
  gr.Markdown(LEADERBOARD_INFO)
 
9
  import firebase_admin
10
  from firebase_admin import credentials
11
  from firebase_admin import firestore
12
+ from google.cloud.firestore_v1 import base_query
13
  import gradio as gr
14
  import pandas as pd
15
 
 
19
  firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
20
  db = firestore.client()
21
 
22
+ SUPPORTED_TRANSLATION_LANGUAGES = [
23
+ "Korean", "English", "Chinese", "Japanese", "Spanish", "French"
24
+ ]
25
+
26
 
27
  class LeaderboardTab(enum.Enum):
28
  SUMMARIZATION = "Summarization"
 
49
  return rating
50
 
51
 
52
+ def get_docs(tab: str, source_lang: str = None, target_lang: str = None):
53
  if tab == LeaderboardTab.SUMMARIZATION:
54
  return db.collection("arena-summarizations").order_by("timestamp").stream()
55
 
56
  if tab == LeaderboardTab.TRANSLATION:
57
+ collection = db.collection("arena-translations").order_by("timestamp")
58
+
59
+ if source_lang:
60
+ collection = collection.where(filter=base_query.FieldFilter(
61
+ "source_language", "==", source_lang.lower()))
62
+
63
+ if target_lang:
64
+ collection = collection.where(filter=base_query.FieldFilter(
65
+ "target_language", "==", target_lang.lower()))
66
 
67
+ return collection.stream()
68
 
69
+
70
+ def load_elo_ratings(tab, source_lang: str = None, target_lang: str = None):
71
+ docs = get_docs(tab, source_lang, target_lang)
72
 
73
  battles = []
74
  for doc in docs:
 
90
  for i, (model, rating) in enumerate(sorted_ratings)]
91
 
92
 
93
+ LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
94
+ LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
95
 
96
+ DEFAULT_FILTER_OPTIONS = {
97
+ "source_language": "English",
98
+ "target_language": "Spanish"
99
+ }
100
 
101
+ filtered_dataframe = gr.DataFrame(
102
+ headers=["Rank", "Model", "Elo rating"],
103
+ datatype=["number", "str", "number"],
104
+ value=lambda: load_elo_ratings(
105
+ LeaderboardTab.TRANSLATION, DEFAULT_FILTER_OPTIONS[
106
+ "source_language"], DEFAULT_FILTER_OPTIONS["target_language"]),
107
+ elem_classes="leaderboard")
108
 
109
 
110
+ def update_filtered_leaderboard(source_lang, target_lang):
111
+ new_value = load_elo_ratings(LeaderboardTab.TRANSLATION, source_lang,
112
+ target_lang)
113
+ return gr.update(value=new_value)
114
 
115
 
116
  def build_leaderboard():
 
118
  with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
119
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
120
  datatype=["number", "str", "number"],
121
+ value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
122
  every=LEADERBOARD_UPDATE_INTERVAL,
123
  elem_classes="leaderboard")
124
  gr.Markdown(LEADERBOARD_INFO)
125
 
 
126
  with gr.Tab(LeaderboardTab.TRANSLATION.value):
127
+ with gr.Accordion("Filter", open=False):
128
+ with gr.Row():
129
+ source_language = gr.Dropdown(
130
+ choices=SUPPORTED_TRANSLATION_LANGUAGES,
131
+ label="Source language",
132
+ value=DEFAULT_FILTER_OPTIONS["source_language"],
133
+ interactive=True)
134
+ target_language = gr.Dropdown(
135
+ choices=SUPPORTED_TRANSLATION_LANGUAGES,
136
+ label="Target language",
137
+ value=DEFAULT_FILTER_OPTIONS["target_language"],
138
+ interactive=True)
139
+
140
+ source_language.change(fn=update_filtered_leaderboard,
141
+ inputs=[source_language, target_language],
142
+ outputs=filtered_dataframe)
143
+ target_language.change(fn=update_filtered_leaderboard,
144
+ inputs=[source_language, target_language],
145
+ outputs=filtered_dataframe)
146
+
147
+ with gr.Row():
148
+ filtered_dataframe.render()
149
+
150
  gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
151
  datatype=["number", "str", "number"],
152
+ value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION),
153
  every=LEADERBOARD_UPDATE_INTERVAL,
154
  elem_classes="leaderboard")
155
  gr.Markdown(LEADERBOARD_INFO)