nxphi47 commited on
Commit
87a0436
1 Parent(s): b6e0d00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -62
app.py CHANGED
@@ -16,32 +16,6 @@ import plotly.graph_objects as go
16
  CATEGORIES = ["task-solving", "math-reasoning", "general-instruction", "natural-question", "safety"]
17
  LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
18
 
19
- # benchmark_name = "sea_bench_all"
20
-
21
- # with open(f"data/{benchmark_name}/question.jsonl", 'r') as f:
22
- # questions = [
23
- # json.loads(x)
24
- # for x in f
25
- # ]
26
- # questions = {
27
- # q['question_id']: q
28
- # for q in questions
29
- # }
30
-
31
-
32
- # def get_model_df():
33
- # cnt = 0
34
- # q2result = []
35
- # fin = open(f"data/{benchmark_name}/model_judgment/gpt-4_single.jsonl", "r")
36
- # for line in fin:
37
- # obj = json.loads(line)
38
- # # obj["category"] = CATEGORIES[(obj["question_id"]-81)//10]
39
- # obj["category"] = questions[obj['question_id']]['category']
40
- # obj["lang"] = questions[obj['question_id']]['lang']
41
- # q2result.append(obj)
42
- # df = pd.DataFrame(q2result)
43
- # return df
44
-
45
 
46
  force_download = bool(int(os.environ.get("force_download", "1")))
47
  HF_TOKEN = str(os.environ.get("HF_TOKEN", ""))
@@ -50,6 +24,97 @@ PERFORMANCE_FILENAME = str(os.environ.get("PERFORMANCE_FILENAME", "gpt4_single_j
50
 
51
  MODEL_DFRAME = None
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def get_model_df():
54
  global MODEL_DFRAME
55
  if isinstance(MODEL_DFRAME, pd.DataFrame):
@@ -121,43 +186,9 @@ def polar_subplot(fig, dframe, model_names, category_label, category_names, row,
121
  )
122
  fig.add_trace(polar, row, col)
123
 
124
- rename_map = {
125
- # "seallm13b10L4k_a_sft4xdpo_5a": "SeaLLM-13b-10L",
126
- "seallm13b10L6k_a_5a1R1_seaall_sft4x_1_5a1_r2_0_dpo_8_40000s": "SeaLLM-13b",
127
- "polylm": "PolyLM-13b",
128
- "qwen": "Qwen-14b",
129
- "gpt-3.5-turbo": "GPT-3.5-turbo",
130
- "gpt-4-1106-preview": "GPT-4-turbo",
131
- }
132
- CATEGORIES = [ "task-solving", "math-reasoning", "general-instruction", "natural-question", "safety", ]
133
-
134
- CATEGORIES_NAMES = {
135
- "task-solving": 'Task-solving',
136
- "math-reasoning": 'Math',
137
- "general-instruction": 'General-instruction',
138
- "natural-question": 'NaturalQA',
139
- "safety": 'Safety',
140
- }
141
-
142
-
143
- # LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
144
- LANGS = ['en', 'vi', 'id', 'ms', 'tl', 'th', 'km', 'lo', 'my']
145
- LANG_NAMES = {
146
- 'en': 'eng',
147
- 'vi': 'vie',
148
- 'th': 'tha',
149
- 'id': 'ind',
150
- 'km': 'khm',
151
- 'lo': 'lao',
152
- 'ms': 'msa',
153
- 'my': 'mya',
154
- 'tl': 'tgl',
155
-
156
- }
157
-
158
 
159
  def plot_agg_fn():
160
- df = get_model_df()
161
 
162
  all_models = df["model"].unique()
163
  model_names = list(rename_map.items())
@@ -228,7 +259,7 @@ def plot_agg_fn():
228
 
229
 
230
  def plot_by_lang_fn():
231
- df = get_model_df()
232
  model_names = list(rename_map.items())
233
 
234
  fig = make_subplots(
 
16
  CATEGORIES = ["task-solving", "math-reasoning", "general-instruction", "natural-question", "safety"]
17
  LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  force_download = bool(int(os.environ.get("force_download", "1")))
21
  HF_TOKEN = str(os.environ.get("HF_TOKEN", ""))
 
24
 
25
  MODEL_DFRAME = None
26
 
27
+
28
+ CATEGORIES = ["task-solving", "math-reasoning", "general-instruction", "natural-question", "safety"]
29
+ LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
30
+
31
+ FORCE_DOWNLOAD = bool(int(os.environ.get("FORCE_DOWNLOAD", "0")))
32
+ HF_TOKEN = str(os.environ.get("HF_TOKEN", ""))
33
+ DATA_SET_REPO_PATH = str(os.environ.get("DATA_SET_REPO_PATH", "SeaLLMs/Sea-bench"))
34
+
35
+ PERFORMANCE_FILENAME = str(os.environ.get("PERFORMANCE_FILENAME", "model_judgment/gpt-4_single.jsonl"))
36
+ QUESTION_FILE_NAME = str(os.environ.get("QUESTION_FILE_NAME", "question.jsonl"))
37
+
38
+ rename_map = {
39
+ "seallm-13b-chat": "SeaLLM-13b",
40
+ "polylm-13b": "PolyLM-13b",
41
+ "qwen-14b": "Qwen-14b",
42
+ "gpt-3.5-turbo": "GPT-3.5-turbo",
43
+ }
44
+ CATEGORIES = [ "task-solving", "math-reasoning", "general-instruction", "natural-question", "safety", ]
45
+
46
+ CATEGORIES_NAMES = {
47
+ "task-solving": 'Task-solving',
48
+ "math-reasoning": 'Math',
49
+ "general-instruction": 'General-instruction',
50
+ "natural-question": 'NaturalQA',
51
+ "safety": 'Safety',
52
+ }
53
+
54
+ LANGS = ['en', 'vi', 'id', 'ms', 'tl', 'th', 'km', 'lo', 'my']
55
+ LANG_NAMES = {
56
+ 'en': 'eng',
57
+ 'vi': 'vie',
58
+ 'th': 'tha',
59
+ 'id': 'ind',
60
+ 'km': 'khm',
61
+ 'lo': 'lao',
62
+ 'ms': 'msa',
63
+ 'my': 'mya',
64
+ 'tl': 'tgl',
65
+ }
66
+
67
+
68
+
69
+ MODEL_DFRAME = None
70
+
71
+
72
+ def read_jsonl_report(question_path, file_path):
73
+ with open(question_path, 'r') as f:
74
+ questions = [
75
+ json.loads(x)
76
+ for x in f
77
+ ]
78
+ questions = {
79
+ q['question_id']: q
80
+ for q in questions
81
+ }
82
+
83
+ q2result = []
84
+ fin = open(file_path, "r")
85
+ for line in fin:
86
+ obj = json.loads(line)
87
+ obj["category"] = questions[obj['question_id']]['category']
88
+ obj["lang"] = questions[obj['question_id']]['lang']
89
+ q2result.append(obj)
90
+ df = pd.DataFrame(q2result)
91
+ return df
92
+
93
+ def get_report_df_from_jsonl():
94
+ from huggingface_hub import hf_hub_download
95
+ assert DATA_SET_REPO_PATH != ''
96
+ assert HF_TOKEN != ''
97
+ repo_id = DATA_SET_REPO_PATH
98
+ question_path = hf_hub_download(
99
+ repo_id=repo_id,
100
+ filename=QUESTION_FILE_NAME,
101
+ force_download=FORCE_DOWNLOAD,
102
+ local_dir='./hf_cache',
103
+ repo_type="dataset",
104
+ token=HF_TOKEN
105
+ )
106
+ file_path = hf_hub_download(
107
+ repo_id=repo_id,
108
+ filename=PERFORMANCE_FILENAME,
109
+ force_download=FORCE_DOWNLOAD,
110
+ local_dir='./hf_cache',
111
+ repo_type="dataset",
112
+ token=HF_TOKEN
113
+ )
114
+ print(f'Downloaded file at {question_path}/ {file_path} from {DATA_SET_REPO_PATH} / {PERFORMANCE_FILENAME}')
115
+ return read_jsonl_report(question_path, file_path)
116
+
117
+
118
  def get_model_df():
119
  global MODEL_DFRAME
120
  if isinstance(MODEL_DFRAME, pd.DataFrame):
 
186
  )
187
  fig.add_trace(polar, row, col)
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  def plot_agg_fn():
191
+ df = get_report_df_from_jsonl()
192
 
193
  all_models = df["model"].unique()
194
  model_names = list(rename_map.items())
 
259
 
260
 
261
  def plot_by_lang_fn():
262
+ df = get_report_df_from_jsonl()
263
  model_names = list(rename_map.items())
264
 
265
  fig = make_subplots(