Sean Cho commited on
Commit
a86ccea
โ€ข
1 Parent(s): ce5c604

Update texts

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. src/assets/text_content.py +12 -80
app.py CHANGED
@@ -322,7 +322,7 @@ with demo:
322
  )
323
  with gr.Row():
324
  deleted_models_visibility = gr.Checkbox(
325
- value=True, label="Show gated/private/deleted models", interactive=True
326
  )
327
  with gr.Column(min_width=320):
328
  search_bar = gr.Textbox(
@@ -455,7 +455,7 @@ with demo:
455
  max_rows=5,
456
  )
457
  with gr.Accordion(
458
- f"๐Ÿ”„ ํ‰๊ฐ€ ์ง„ํ–‰ ๋Œ€๊ธฐ์—ด ({len(running_eval_queue_df)})",
459
  open=False,
460
  ):
461
  with gr.Row():
@@ -467,7 +467,7 @@ with demo:
467
  )
468
 
469
  with gr.Accordion(
470
- f"โณ ํ‰๊ฐ€ ๋Œ€๊ธฐ ๋Œ€๊ธฐ์—ด ({len(pending_eval_queue_df)})",
471
  open=False,
472
  ):
473
  with gr.Row():
 
322
  )
323
  with gr.Row():
324
  deleted_models_visibility = gr.Checkbox(
325
+ value=True, label="๐Ÿ‘€ ์‚ญ์ œ/๋น„๊ณต๊ฐœ๋œ ๋ชจ๋ธ๋„ ํ•จ๊ป˜ ๋ณด๊ธฐ", interactive=True
326
  )
327
  with gr.Column(min_width=320):
328
  search_bar = gr.Textbox(
 
455
  max_rows=5,
456
  )
457
  with gr.Accordion(
458
+ f"๐Ÿ”„ ํ‰๊ฐ€ ์ง„ํ–‰ ์ค‘ ({len(running_eval_queue_df)})",
459
  open=False,
460
  ):
461
  with gr.Row():
 
467
  )
468
 
469
  with gr.Accordion(
470
+ f"โณ ํ‰๊ฐ€ ๋Œ€๊ธฐ ์ค‘ ({len(pending_eval_queue_df)})",
471
  open=False,
472
  ):
473
  with gr.Row():
src/assets/text_content.py CHANGED
@@ -40,29 +40,18 @@ LLM ์‹œ๋Œ€์— ๊ฑธ๋งž๋Š” ํ‰๊ฐ€๋ฅผ ์œ„ํ•ด ์ƒ์‹, ์ „๋ฌธ ์ง€์‹, ์ถ”๋ก , ํ™˜๊ฐ,
40
 
41
  KT๋กœ๋ถ€ํ„ฐ ํ‰๊ฐ€์— ์‚ฌ์šฉ๋˜๋Š” GPU๋ฅผ ์ œ๊ณต๋ฐ›์•˜์Šต๋‹ˆ๋‹ค.
42
 
43
- ## Details and logs
44
- You can find:
45
  - ์ข€ ๋” ์ž์„ธํ•œ ์ˆ˜์น˜ ์ •๋ณด๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/results
46
  - ๋ชจ๋ธ์˜ ์ž…์ถœ๋ ฅ์— ๋Œ€ํ•œ ์ž์„ธํ•œ ์ •๋ณด๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/details
47
  - ๋ชจ๋ธ์˜ ํ‰๊ฐ€ ํ์™€ ํ‰๊ฐ€ ์ƒํƒœ๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/requests
48
 
49
- ## Reproducibility
50
  ํ‰๊ฐ€ ๊ฒฐ๊ณผ๋ฅผ ์žฌํ˜„ํ•˜๊ธฐ ์œ„ํ•ด์„œ๋Š” [์ด ๋ฒ„์ „](https://github.com/EleutherAI/lm-evaluation-harness/tree/b281b0921b636bc36ad05c0b0b0763bd6dd43463)์˜ ๋ฐ์ดํ„ฐ์…‹์„ ์ด์šฉํ•˜์„ธ์š”. (๋ฐ‘์—๋Š” ์ฝ”๋“œ ๋ฐ ํ‰๊ฐ€ ํ™˜๊ฒฝ์ด๋ผ์„œ ์ผ๋‹จ skip)
51
 
52
- The total batch size we get for models which fit on one A100 node is 16 (8 GPUs * 2). If you don't use parallelism, adapt your batch size to fit.
53
- *You can expect results to vary slightly for different batch sizes because of padding.*
54
-
55
- The tasks and few shots parameters are:
56
- - ARC: 25-shot, *arc-challenge* (`acc_norm`)
57
- - HellaSwag: 10-shot, *hellaswag* (`acc_norm`)
58
- - TruthfulQA: 0-shot, *truthfulqa-mc* (`mc2`)
59
- - MMLU: 5-shot, *hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions* (average of all the results `acc`)
60
-
61
- ## Quantization
62
- To get more information about quantization, see:
63
- - 8 bits: [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), [paper](https://arxiv.org/abs/2208.07339)
64
- - 4 bits: [blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes), [paper](https://arxiv.org/abs/2305.14314)
65
-
66
  """
67
 
68
  EVALUATION_QUEUE_TEXT = f"""
@@ -98,68 +87,11 @@ safetensors๋Š” weight๋ฅผ ๋ณด๊ด€ํ•˜๋Š” ์ƒˆ๋กœ์šด ํฌ๋งท์œผ๋กœ, ํ›จ์”ฌ ์•ˆ์ „ํ•˜
98
 
99
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
100
  CITATION_BUTTON_TEXT = r"""
101
- @misc{open-llm-leaderboard,
102
- author = {Edward Beeching, Clรฉmentine Fourrier, Nathan Habib, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf},
103
- title = {Open LLM Leaderboard},
104
  year = {2023},
105
- publisher = {Hugging Face},
106
- howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
107
- }
108
- @software{eval-harness,
109
- author = {Gao, Leo and
110
- Tow, Jonathan and
111
- Biderman, Stella and
112
- Black, Sid and
113
- DiPofi, Anthony and
114
- Foster, Charles and
115
- Golding, Laurence and
116
- Hsu, Jeffrey and
117
- McDonell, Kyle and
118
- Muennighoff, Niklas and
119
- Phang, Jason and
120
- Reynolds, Laria and
121
- Tang, Eric and
122
- Thite, Anish and
123
- Wang, Ben and
124
- Wang, Kevin and
125
- Zou, Andy},
126
- title = {A framework for few-shot language model evaluation},
127
- month = sep,
128
- year = 2021,
129
- publisher = {Zenodo},
130
- version = {v0.0.1},
131
- doi = {10.5281/zenodo.5371628},
132
- url = {https://doi.org/10.5281/zenodo.5371628}
133
  }
134
- @misc{clark2018think,
135
- title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
136
- author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
137
- year={2018},
138
- eprint={1803.05457},
139
- archivePrefix={arXiv},
140
- primaryClass={cs.AI}
141
- }
142
- @misc{zellers2019hellaswag,
143
- title={HellaSwag: Can a Machine Really Finish Your Sentence?},
144
- author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
145
- year={2019},
146
- eprint={1905.07830},
147
- archivePrefix={arXiv},
148
- primaryClass={cs.CL}
149
- }
150
- @misc{hendrycks2021measuring,
151
- title={Measuring Massive Multitask Language Understanding},
152
- author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
153
- year={2021},
154
- eprint={2009.03300},
155
- archivePrefix={arXiv},
156
- primaryClass={cs.CY}
157
- }
158
- @misc{lin2022truthfulqa,
159
- title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
160
- author={Stephanie Lin and Jacob Hilton and Owain Evans},
161
- year={2022},
162
- eprint={2109.07958},
163
- archivePrefix={arXiv},
164
- primaryClass={cs.CL}
165
- }"""
 
40
 
41
  KT๋กœ๋ถ€ํ„ฐ ํ‰๊ฐ€์— ์‚ฌ์šฉ๋˜๋Š” GPU๋ฅผ ์ œ๊ณต๋ฐ›์•˜์Šต๋‹ˆ๋‹ค.
42
 
43
+ ## ์ข€ ๋” ์ž์„ธํ•œ ์ •๋ณด
 
44
  - ์ข€ ๋” ์ž์„ธํ•œ ์ˆ˜์น˜ ์ •๋ณด๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/results
45
  - ๋ชจ๋ธ์˜ ์ž…์ถœ๋ ฅ์— ๋Œ€ํ•œ ์ž์„ธํ•œ ์ •๋ณด๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/details
46
  - ๋ชจ๋ธ์˜ ํ‰๊ฐ€ ํ์™€ ํ‰๊ฐ€ ์ƒํƒœ๋Š”: https://huggingface.co/datasets/open-llm-leaderboard/requests
47
 
48
+ ## ๊ฒฐ๊ณผ ์žฌํ˜„
49
  ํ‰๊ฐ€ ๊ฒฐ๊ณผ๋ฅผ ์žฌํ˜„ํ•˜๊ธฐ ์œ„ํ•ด์„œ๋Š” [์ด ๋ฒ„์ „](https://github.com/EleutherAI/lm-evaluation-harness/tree/b281b0921b636bc36ad05c0b0b0763bd6dd43463)์˜ ๋ฐ์ดํ„ฐ์…‹์„ ์ด์šฉํ•˜์„ธ์š”. (๋ฐ‘์—๋Š” ์ฝ”๋“œ ๋ฐ ํ‰๊ฐ€ ํ™˜๊ฒฝ์ด๋ผ์„œ ์ผ๋‹จ skip)
50
 
51
+ ## ๋”๋ณด๊ธฐ
52
+ ์งˆ๋ฌธ์ด ์žˆ์œผ์‹œ๋ฉด [์—ฌ๊ธฐ](https://huggingface.co/spaces/BearSean/leaderboard-test/discussions/1)์„œ FAQ๋ฅผ ํ™•์ธํ•˜์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค!
53
+ ๋˜ํ•œ ์ปค๋ฎค๋‹ˆํ‹ฐ, ๋‹ค๋ฅธ ํŒ€ ๋ฐ ์—ฐ๊ตฌ์†Œ์—์„œ ์ œ๊ณตํ•˜๋Š” ๋ฉ‹์ง„ ์ž๋ฃŒ๋ฅผ ์—ฌ๊ธฐ์— ๋ชจ์•„ ๋‘์—ˆ์Šต๋‹ˆ๋‹ค!
54
+ If you still have questions, you can check our FAQ here! We also gather cool resources from the community, other teams, and other labs here!
 
 
 
 
 
 
 
 
 
 
55
  """
56
 
57
  EVALUATION_QUEUE_TEXT = f"""
 
87
 
88
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
89
  CITATION_BUTTON_TEXT = r"""
90
+ @misc{open-ko-llm-leaderboard,
91
+ author = {},
92
+ title = {Open Ko LLM Leaderboard},
93
  year = {2023},
94
+ publisher = {Upstage, National Information Society Agency},
95
+ howpublished = "\url{https://huggingface.co/spaces/BearSean/leaderboard-test/discussions/1}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
+ """