polish_medical_leaderboard

Running on CPU Upgrade

App Files Files Community

djstrong commited on Sep 15, 2024

Commit

76bf962

1 Parent(s): 11c52b6

expand references further

Browse files

Files changed (1) hide show

src/about.py +53 -3

src/about.py CHANGED Viewed

@@ -110,6 +110,7 @@ g_tasks = [task.value.benchmark for task in Tasks if task.value.type == "generat
 mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
 rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book', 'polish_poquad_open_book']
 all_tasks = g_tasks + mc_tasks
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
@@ -245,12 +246,61 @@ If everything is done, check you can launch the EleutherAIHarness on your model
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
-CITATION_BUTTON_TEXT = r"""
-@misc{open-pl-llm-leaderboard,
 	title        = {Polish Medical Leaderboard},
 	author       = {Wróbel, Krzysztof and {SpeakLeash Team} and {Cyfronet Team}},
 	year         = 2024,
 	publisher    = {Hugging Face},
 	howpublished = "\url{https://huggingface.co/spaces/speakleash/polish_medical_leaderboard}"
 }
-"""

 mc_tasks = [task.value.benchmark for task in Tasks if task.value.type == "multiple_choice"]
 rag_tasks = ['polish_polqa_reranking_multiple_choice', 'polish_polqa_open_book', 'polish_poquad_open_book']
 all_tasks = g_tasks + mc_tasks
+all_tasks.remove('polish_pes')
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""@misc{polish-medical-llm-leaderboard,
 	title        = {Polish Medical Leaderboard},
 	author       = {Wróbel, Krzysztof and {SpeakLeash Team} and {Cyfronet Team}},
 	year         = 2024,
 	publisher    = {Hugging Face},
 	howpublished = "\url{https://huggingface.co/spaces/speakleash/polish_medical_leaderboard}"
 }
+@misc{eval-harness,
+  author       = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},
+  title        = {A framework for few-shot language model evaluation},
+  month        = 07,
+  year         = 2024,
+  publisher    = {Zenodo},
+  version      = {v0.4.3},
+  doi          = {10.5281/zenodo.12608602},
+  url          = {https://zenodo.org/records/12608602}
+}
+@misc{fwpes2024,
+  title={speakleash/PES-2018-2022},
+  author={Maria, Filipkowska and Krzyszof, Wróbel},
+  year={2024},
+  howpublished={\url{https://huggingface.co/datasets/speakleash/PES-2018-2022}},
+}
+@misc{pkgpes2024,
+  title={amu-cai/PES-2018-2022},
+  author={Jakub Pokrywka and Jeremi Kaczmarek and Edward Gorzelańczyk},
+  year={2024},
+  howpublished={\url{https://huggingface.co/datasets/amu-cai/PES-2018-2022}},
+}
+@misc{pokrywka2024gpt4,
+      title={GPT-4 passes most of the 297 written Polish Board Certification Examinations},
+      author={Jakub Pokrywka and Jeremi Kaczmarek and Edward Gorzelańczyk},
+      year={2024},
+      eprint={2405.01589},
+      archivePrefix={arXiv},
+      primaryClass={id='cs.CL' full_name='Computation and Language' is_active=True alt_name='cmp-lg' in_archive='cs' is_general=False description='Covers natural language processing. Roughly includes material in ACM Subject Class I.2.7. Note that work on artificial languages (programming languages, logics, formal systems) that does not explicitly address natural-language issues broadly construed (natural-language processing, computational linguistics, speech, text retrieval, etc.) is not appropriate for this area.'}
+}
+@misc{open-pl-llm-leaderboard,
+	title        = {Open PL LLM Leaderboard},
+	author       = {Wróbel, Krzysztof and {SpeakLeash Team} and {Cyfronet Team}},
+	year         = 2024,
+	publisher    = {Hugging Face},
+	howpublished = "\url{https://huggingface.co/spaces/speakleash/open_pl_llm_leaderboard}"
+}
+@misc{open-llm-leaderboard-v1,
+  author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
+  title = {Open LLM Leaderboard (2023-2024)},
+  year = {2023},
+  publisher = {Hugging Face},
+  howpublished = "\url{https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard}"
+}
+"""