binwang commited on
Commit
1ad7fa2
1 Parent(s): 96acb28
Files changed (1) hide show
  1. app.py +6 -30
app.py CHANGED
@@ -2193,7 +2193,6 @@ MRPC_FIVE_SHOT = get_data_mrpc(eval_mode="five_shot")
2193
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
2194
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
2195
 
2196
- # block = gr.Blocks(theme=gr.themes.Soft())
2197
 
2198
  theme = gr.themes.Soft().set(
2199
  background_fill_primary='*secondary_50'
@@ -2204,13 +2203,13 @@ block = gr.Blocks(theme='rottenlittlecreature/Moon_Goblin')
2204
 
2205
  with block:
2206
  gr.Markdown(f"""
2207
- ## SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
2208
  - **Number of Datasets**: > 30
2209
  - **Number of Languages**: > 8
2210
  - **Number of Models**: {NUM_MODELS}
2211
  - **Mode of Evaluation**: Zero-Shot, Five-Shot
2212
 
2213
- ## Know Issues:
2214
  - For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
2215
 
2216
  ### The following table shows the performance of the models on the SeaEval benchmark.
@@ -2220,11 +2219,7 @@ with block:
2220
 
2221
  """)
2222
 
2223
-
2224
-
2225
  with gr.Tabs():
2226
-
2227
-
2228
  with gr.TabItem("Cross-Lingual Consistency"):
2229
 
2230
  # dataset 1: cross-mmlu
@@ -3225,8 +3220,8 @@ with block:
3225
  """)
3226
 
3227
 
3228
- gr.Markdown(f"""
3229
- ## If our datasets and leaderboard are useful, please consider cite:
3230
  ```bibtex
3231
  @article{SeaEval,
3232
  title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
@@ -3237,25 +3232,6 @@ with block:
3237
  """)
3238
 
3239
 
3240
- # Running the functions on page load in addition to when the button is clicked
3241
- # This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
3242
- """
3243
- block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
3244
- """
3245
-
3246
-
3247
-
3248
-
3249
-
3250
  block.queue(max_size=10)
3251
- block.launch(server_name="0.0.0.0", share=False)
3252
-
3253
-
3254
- # Possible changes:
3255
- # Could add graphs / other visual content
3256
- # Could add verification marks
3257
-
3258
- # Sources:
3259
- # https://huggingface.co/spaces/gradio/leaderboard
3260
- # https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
3261
- # https://getemoji.com/
 
2193
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
2194
  # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
2195
 
 
2196
 
2197
  theme = gr.themes.Soft().set(
2198
  background_fill_primary='*secondary_50'
 
2203
 
2204
  with block:
2205
  gr.Markdown(f"""
2206
+ ### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
2207
  - **Number of Datasets**: > 30
2208
  - **Number of Languages**: > 8
2209
  - **Number of Models**: {NUM_MODELS}
2210
  - **Mode of Evaluation**: Zero-Shot, Five-Shot
2211
 
2212
+ ### Know Issues:
2213
  - For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
2214
 
2215
  ### The following table shows the performance of the models on the SeaEval benchmark.
 
2219
 
2220
  """)
2221
 
 
 
2222
  with gr.Tabs():
 
 
2223
  with gr.TabItem("Cross-Lingual Consistency"):
2224
 
2225
  # dataset 1: cross-mmlu
 
3220
  """)
3221
 
3222
 
3223
+ gr.Markdown(r"""
3224
+ ### If our datasets and leaderboard are useful, please consider cite:
3225
  ```bibtex
3226
  @article{SeaEval,
3227
  title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
 
3232
  """)
3233
 
3234
 
 
 
 
 
 
 
 
 
 
 
3235
  block.queue(max_size=10)
3236
+ # block.launch(server_name="0.0.0.0", share=False)
3237
+ block.launch(server_name="0.0.0.0", share=True)