Spaces:

taresco
/

open_african_languages_eval_leaderboard

Running

mockup eval

c25e6bb about 1 month ago

No virus

1.39 kB

	TITLE = '<h1 align="center" id="space-title">African Languages LLM Eval Leaderboard</h1>'

	INTRO_TEXT = f"""
	## About
	This leaderboard tracks progress and ranks performance of large language models (LLMs) on African languages.

	We currently evaluate models over the following benchmarks:

	- <a href="https://arxiv.org/abs/2406.03368" target="_blank"> Iroko Bench </a>
	- AfriMMLU (Direct and Translate) (0-shot)
	- AfriXNLI (Direct and Translate) (0-shot)

	- <a href="https://aclanthology.org/2024.acl-long.44" target="_blank"> BeleBele </a>
	- <a href="https://arxiv.org/abs/2305.06897" target="_blank"> AfriQa </a>
	"""

	HOW_TO = f"""
	## How to list your model performance on this leaderboard:
	Run the evaluation of your model using this repo: <a href="https://github.com/The-African-Research-Collective/jubilant-sniffle" target="_blank">https://github.com/The-African-Research-Collective/jubilant-sniffle</a>.
	And then, push the evaluation log and make a pull request.
	"""

	CREDIT = f"""
	## Credit
	To make this website, we use the following resources:
	- Datasets (IrokoBench, AfriQA, BeleBele)
	- Evaluation code (EleutherAI's lm_evaluation_harness repo)
	- Multilingual LLM Leaderboard code (UONLP's Open Multilingual LLM Evaluation Leaderboard)
	"""


	CITATION = f"""
	## Citation
	```
	@misc{{tarescollmbenchmark,
	author = {{tbd...}},
	title={{tbd...}},
	year={{2024}}
	}}
	```
	"""