Sean Cho commited on
Commit
fced05c
β€’
1 Parent(s): 3c0be00

update texts

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. src/assets/text_content.py +13 -12
app.py CHANGED
@@ -500,7 +500,7 @@ with demo:
500
  with gr.Row():
501
  with gr.Column():
502
  model_name_textbox = gr.Textbox(label="Model name")
503
- revision_name_textbox = gr.Textbox(label="revision", placeholder="main")
504
  private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
505
  model_type = gr.Dropdown(
506
  choices=[
 
500
  with gr.Row():
501
  with gr.Column():
502
  model_name_textbox = gr.Textbox(label="Model name")
503
+ revision_name_textbox = gr.Textbox(label="Revision", placeholder="main")
504
  private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
505
  model_type = gr.Dropdown(
506
  choices=[
src/assets/text_content.py CHANGED
@@ -4,14 +4,15 @@ TITLE = """<img src="https://upstage-open-ko-llm-leaderboard-logos.s3.ap-northea
4
  BOTTOM_LOGO = """<img src="https://upstage-open-ko-llm-leaderboard-logos.s3.ap-northeast-2.amazonaws.com/footer_logo_1.png" style="width:40%;display:block;margin-left:auto;margin-right:auto">"""
5
 
6
  INTRODUCTION_TEXT = f"""
7
- πŸš€ The Open Ko-LLM Leaderboard πŸ‡°πŸ‡· objectively evaluates the performance of Korean large language model.
8
 
9
  When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
10
- The data used for evaluation consists of datasets to assess expertise, inference ability, hallucination, and common sense.
 
11
  The evaluation dataset is exclusively private and only available for evaluation process.
12
  More detailed information about the benchmark dataset is provided on the β€œAbout” page.
13
 
14
- This leaderboard is co-hosted by __Upstage__ and __NIA__, and operated by __Upstage__.
15
  """
16
 
17
  LLM_BENCHMARKS_TEXT = f"""
@@ -32,18 +33,18 @@ Please provide information about the model through an issue! 🀩
32
  ## How it works
33
 
34
  πŸ“ˆ We have set up a benchmark using datasets translated into Korean, and applied variations by human experts, from the four tasks (HellaSwag, MMLU, Arc, Truthful QA) operated by HuggingFace OpenLLM. We have also added a new dataset prepared from scratch.
35
- - Ko-HellaSwag (provided by __Upstage__, machine translation)
36
- - Ko-MMLU (provided by __Upstage__, human translation and variation)
37
- - Ko-Arc (provided by __Upstage__, human translation and variation)
38
- - Ko-Truthful QA (provided by __Upstage__, human translation and variation)
39
- - Ko-CommonGen V2 (provided by __Korea University NLP&AI Lab__, created from scratch)
40
  To provide an evaluation befitting the LLM era, we've selected benchmark datasets suitable for assessing these elements: expertise, inference, hallucination, and common sense. The final score is converted to the average score from each evaluation datasets.
41
 
42
- GPUs are provided by __KT__ for the evaluations.
43
 
44
  ## Details and Logs
45
  - Detailed numerical results in the `results` Upstage dataset: https://huggingface.co/datasets/open-ko-llm-leaderboard/results
46
- - community queries and running status in the `requests` Upstage dataset: https://huggingface.co/datasets/open-ko-llm-leaderboard/requests
47
 
48
  ## More resources
49
  If you still have questions, you can check our FAQ [here](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard/discussions/1)!
@@ -85,9 +86,9 @@ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
85
  CITATION_BUTTON_TEXT = r"""
86
  @misc{open-ko-llm-leaderboard,
87
  author = {Chanjun Park, Hwalsuk Lee, Hyunbyung Park, Sanghoon Kim, Seonghwan Cho, Sunghun Kim, Sukyung Lee},
88
- title = {Open Ko LLM Leaderboard},
89
  year = {2023},
90
  publisher = {Upstage, National Information Society Agency},
91
- howpublished = "\url{https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard/discussions/1}"
92
  }
93
  """
 
4
  BOTTOM_LOGO = """<img src="https://upstage-open-ko-llm-leaderboard-logos.s3.ap-northeast-2.amazonaws.com/footer_logo_1.png" style="width:40%;display:block;margin-left:auto;margin-right:auto">"""
5
 
6
  INTRODUCTION_TEXT = f"""
7
+ πŸš€ The Open Ko-LLM Leaderboard πŸ‡°πŸ‡· objectively evaluates the performance of Korean Large Language Model (LLM).
8
 
9
  When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
10
+ We accept models with less than or equal to 13B parameters now.
11
+ The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
12
  The evaluation dataset is exclusively private and only available for evaluation process.
13
  More detailed information about the benchmark dataset is provided on the β€œAbout” page.
14
 
15
+ This leaderboard is co-hosted by __[Upstage](https://www.upstage.ai)__, and __[NIA](https://www.nia.or.kr/site/nia_kor/main.do)__ that provides various Korean Data Sets through __[AI-Hub](aihub.or.kr)__, and operated by __[Upstage](https://www.upstage.ai)__.
16
  """
17
 
18
  LLM_BENCHMARKS_TEXT = f"""
 
33
  ## How it works
34
 
35
  πŸ“ˆ We have set up a benchmark using datasets translated into Korean, and applied variations by human experts, from the four tasks (HellaSwag, MMLU, Arc, Truthful QA) operated by HuggingFace OpenLLM. We have also added a new dataset prepared from scratch.
36
+ - Ko-HellaSwag (provided by __[Upstage](https://www.upstage.ai/)__, machine translation)
37
+ - Ko-MMLU (provided by __[Upstage](https://www.upstage.ai/)__, human translation and variation)
38
+ - Ko-Arc (provided by __[Upstage](https://www.upstage.ai/)__, human translation and variation)
39
+ - Ko-Truthful QA (provided by __[Upstage](https://www.upstage.ai/)__, human translation and variation)
40
+ - Ko-CommonGen V2 (provided by __[Korea University NLP&AI Lab](http://nlp.korea.ac.kr/)__, created from scratch)
41
  To provide an evaluation befitting the LLM era, we've selected benchmark datasets suitable for assessing these elements: expertise, inference, hallucination, and common sense. The final score is converted to the average score from each evaluation datasets.
42
 
43
+ GPUs are provided by __[KT](https://www.kt.com/)__ for the evaluations.
44
 
45
  ## Details and Logs
46
  - Detailed numerical results in the `results` Upstage dataset: https://huggingface.co/datasets/open-ko-llm-leaderboard/results
47
+ - Community queries and running status in the `requests` Upstage dataset: https://huggingface.co/datasets/open-ko-llm-leaderboard/requests
48
 
49
  ## More resources
50
  If you still have questions, you can check our FAQ [here](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard/discussions/1)!
 
86
  CITATION_BUTTON_TEXT = r"""
87
  @misc{open-ko-llm-leaderboard,
88
  author = {Chanjun Park, Hwalsuk Lee, Hyunbyung Park, Sanghoon Kim, Seonghwan Cho, Sunghun Kim, Sukyung Lee},
89
+ title = {Open Ko-LLM Leaderboard},
90
  year = {2023},
91
  publisher = {Upstage, National Information Society Agency},
92
+ howpublished = "\url{https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard}"
93
  }
94
  """