| [ | |
| { | |
| "name": "FLORES+", | |
| "author": "Meta", | |
| "author_url": "https://ai.meta.com", | |
| "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", | |
| "n_languages": 200, | |
| "tasks": ["translation"], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "FLORES", | |
| "implemented": true, | |
| "group": "Translation" | |
| }, | |
| { | |
| "name": "SIB-200", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/Davlan/sib200", | |
| "n_languages": 200, | |
| "tasks": ["classification"], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "FLORES", | |
| "implemented": true, | |
| "group": "Translation" | |
| }, | |
| { | |
| "name": "CCAligned", | |
| "author": "Meta", | |
| "author_url": "https://ai.meta.com", | |
| "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", | |
| "n_languages": 137, | |
| "tasks": [ | |
| "translation" | |
| ], | |
| "parallel": false, | |
| "group": "Translation" | |
| }, | |
| { | |
| "name": "OPUS Collection", | |
| "author": "Helsinki NLP", | |
| "author_url": null, | |
| "url": "https://opus.nlpl.eu", | |
| "n_languages": 747, | |
| "tasks": [ | |
| "translation" | |
| ], | |
| "parallel": false, | |
| "group": "Translation" | |
| }, | |
| { | |
| "name": "Global MMLU", | |
| "author": "Cohere", | |
| "author_url": "https://cohere.com", | |
| "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", | |
| "n_languages": 42, | |
| "languages": [ | |
| "am", | |
| "ar", | |
| "bn", | |
| "cs", | |
| "de", | |
| "el", | |
| "en", | |
| "es", | |
| "fa", | |
| "fil", | |
| "fr", | |
| "ha", | |
| "he", | |
| "hi", | |
| "id", | |
| "ig", | |
| "it", | |
| "ja", | |
| "ko", | |
| "ky", | |
| "lt", | |
| "mg", | |
| "ms", | |
| "ne", | |
| "nl", | |
| "ny", | |
| "pl", | |
| "pt", | |
| "ro", | |
| "ru", | |
| "si", | |
| "sn", | |
| "so", | |
| "sr", | |
| "sv", | |
| "sw", | |
| "te", | |
| "tr", | |
| "uk", | |
| "vi", | |
| "yo", | |
| "zh" | |
| ], | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "mixed", | |
| "base": "MMLU", | |
| "implemented": true, | |
| "group": "Multitask Language Understanding" | |
| }, | |
| { | |
| "name": "MMMLU", | |
| "author": "OpenAI", | |
| "author_url": "https://openai.com", | |
| "url": "https://huggingface.co/datasets/openai/MMMLU", | |
| "n_languages": "14", | |
| "languages": [ | |
| "ar", | |
| "bn", | |
| "de", | |
| "es", | |
| "fr", | |
| "hi", | |
| "id", | |
| "it", | |
| "ja", | |
| "ko", | |
| "pt", | |
| "sw", | |
| "yo", | |
| "zh" | |
| ], | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "MMLU", | |
| "implemented": true, | |
| "group": "Multitask Language Understanding" | |
| }, | |
| { | |
| "name": "AfriMMLU", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/afrimmlu", | |
| "n_languages": "17", | |
| "languages": [ | |
| "am", | |
| "en", | |
| "ee", | |
| "fr", | |
| "ha", | |
| "ig", | |
| "rw", | |
| "ln", | |
| "lg", | |
| "om", | |
| "sn", | |
| "st", | |
| "sw", | |
| "tw", | |
| "wo", | |
| "xh", | |
| "yo", | |
| "zu" | |
| ], | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "MMLU", | |
| "implemented": true, | |
| "group": "Multitask Language Understanding" | |
| }, | |
| { | |
| "name": "Okapi MMLU", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", | |
| "n_languages": 26, | |
| "languages": [ | |
| "ar", | |
| "bn", | |
| "ca", | |
| "da", | |
| "de", | |
| "es", | |
| "eu", | |
| "fr", | |
| "gu", | |
| "hi", | |
| "hr", | |
| "hu", | |
| "hy", | |
| "id", | |
| "it", | |
| "kn", | |
| "ml", | |
| "mr", | |
| "ne", | |
| "nl", | |
| "pt", | |
| "ro", | |
| "ru", | |
| "sk", | |
| "sr", | |
| "sv", | |
| "ta", | |
| "te", | |
| "uk", | |
| "vi", | |
| "zh" | |
| ], | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "MMLU", | |
| "implemented": true, | |
| "group": "Multitask Language Understanding" | |
| }, | |
| { | |
| "name": "MMLU-X", | |
| "author": "OpenGPT-X", | |
| "author_url": "https://opengpt-x.de", | |
| "url": "https://huggingface.co/datasets/openGPT-X/mmlux", | |
| "n_languages": 20, | |
| "languages": [ | |
| "bg", | |
| "cs", | |
| "da", | |
| "de", | |
| "el", | |
| "es", | |
| "et", | |
| "fi", | |
| "fr", | |
| "hu", | |
| "it", | |
| "lt", | |
| "lv", | |
| "nl", | |
| "pl", | |
| "pt", | |
| "ro", | |
| "sk", | |
| "sl", | |
| "sv" | |
| ], | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "MMLU", | |
| "implemented": false, | |
| "group": "Multitask Language Understanding" | |
| }, | |
| { | |
| "name": "MGSM", | |
| "author": "Google", | |
| "author_url": "https://google.com", | |
| "url": "https://huggingface.co/datasets/juletxara/mgsm", | |
| "n_languages": 10, | |
| "tasks": [ | |
| "math" | |
| ], | |
| "parallel": true, | |
| "base": "MGSM", | |
| "implemented": true, | |
| "group": "Grade School Math" | |
| }, | |
| { | |
| "name": "AfriMGSM", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/afrimgsm", | |
| "n_languages": 18, | |
| "tasks": [ | |
| "math" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "MGSM", | |
| "implemented": true, | |
| "group": "Grade School Math" | |
| }, | |
| { | |
| "name": "GSM8K-X", | |
| "author": "OpenGPT-X", | |
| "author_url": "https://opengpt-x.de", | |
| "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx", | |
| "n_languages": 20, | |
| "tasks": [ | |
| "math" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "MGSM", | |
| "implemented": true, | |
| "group": "Grade School Math" | |
| }, | |
| { | |
| "name": "FLEURS", | |
| "author": "Meta", | |
| "author_url": "https://ai.meta.com", | |
| "url": "https://huggingface.co/datasets/google/fleurs", | |
| "n_languages": 102, | |
| "tasks": [ | |
| "speech_recognition" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "FLORES", | |
| "implemented": false, | |
| "group": "Speech Recognition" | |
| }, | |
| { | |
| "name": "CommonVoice", | |
| "author": "Mozilla", | |
| "author_url": "https://blog.mozilla.ai", | |
| "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0", | |
| "n_languages": 124, | |
| "tasks": [ | |
| "speech_recognition" | |
| ], | |
| "parallel": null, | |
| "translation": "human", | |
| "group": "Speech Recognition" | |
| }, | |
| { | |
| "name": "WorldCuisines", | |
| "author": "Academic", | |
| "author_url": "https://worldcuisines.github.io", | |
| "url": "https://huggingface.co/datasets/worldcuisines/vqa", | |
| "n_languages": 30, | |
| "tasks": [ | |
| "visual_question_answering" | |
| ], | |
| "parallel": null, | |
| "group": "Visual Question Answering" | |
| }, | |
| { | |
| "name": "CVQA", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/afaji/cvqa", | |
| "n_languages": 39, | |
| "tasks": [ | |
| "visual_question_answering" | |
| ], | |
| "parallel": null, | |
| "group": "Visual Question Answering" | |
| }, | |
| { | |
| "name": "Uhuru ARC Easy", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", | |
| "n_languages": 6, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "AI2 ARC", | |
| "implemented": false, | |
| "group": "Abstract Reasoning" | |
| }, | |
| { | |
| "name": "Okapi ARC Challenge", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", | |
| "n_languages": 31, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "AI2 ARC", | |
| "implemented": false, | |
| "group": "Abstract Reasoning" | |
| }, | |
| { | |
| "name": "Arc-X", | |
| "author": "OpenGPT-X", | |
| "author_url": "https://opengpt-x.de", | |
| "url": "https://huggingface.co/datasets/openGPT-X/arcx", | |
| "n_languages": 20, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "AI2 ARC", | |
| "implemented": false, | |
| "group": "Abstract Reasoning" | |
| }, | |
| { | |
| "name": "Uhura TruthfulQA", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", | |
| "n_languages": 6, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "TruthfulQA", | |
| "implemented": false, | |
| "group": "Truthfulness" | |
| }, | |
| { | |
| "name": "Okapi TruthfulQA", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", | |
| "n_languages": 31, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "TruthfulQA", | |
| "implemented": false, | |
| "group": "Truthfulness" | |
| }, | |
| { | |
| "name": "TruthfulQA-X", | |
| "author": "OpenGPT-X", | |
| "author_url": "https://opengpt-x.de", | |
| "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax", | |
| "n_languages": 20, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "TruthfulQA", | |
| "implemented": false, | |
| "group": "Truthfulness" | |
| }, | |
| { | |
| "name": "XNLI", | |
| "author": "Meta", | |
| "author_url": "https://ai.meta.com", | |
| "url": "https://huggingface.co/datasets/facebook/xnli", | |
| "n_languages": 14, | |
| "tasks": [ | |
| "classification", | |
| "logic" | |
| ], | |
| "parallel": true, | |
| "base": "MNLI", | |
| "group": "Natural Language Inference" | |
| }, | |
| { | |
| "name": "AfriXNLI", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/afrixnli", | |
| "n_languages": 18, | |
| "tasks": [ | |
| "classification", | |
| "logic" | |
| ], | |
| "parallel": true, | |
| "translation": "human", | |
| "base": "MNLI", | |
| "implemented": false, | |
| "group": "Natural Language Inference" | |
| }, | |
| { | |
| "name": "XGLUE", | |
| "author": "Microsoft", | |
| "author_url": "https://microsoft.ai", | |
| "url": "https://huggingface.co/datasets/microsoft/xglue", | |
| "n_languages": 18, | |
| "tasks": [ | |
| "pos" | |
| ], | |
| "parallel": null, | |
| "base": "GLUE", | |
| "group": "General Language Understanding" | |
| }, | |
| { | |
| "name": "IndicGLUE", | |
| "author": "AI4Bharat", | |
| "author_url": "https://models.ai4bharat.org", | |
| "url": "https://huggingface.co/datasets/ai4bharat/indic_glue", | |
| "n_languages": 11, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": null, | |
| "base": "GLUE", | |
| "group": "General Language Understanding" | |
| }, | |
| { | |
| "name": "Okapi HellaSwag", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", | |
| "n_languages": 31, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "HellaSwag", | |
| "implemented": false, | |
| "group": "Adversarial Language Modelling" | |
| }, | |
| { | |
| "name": "HellaSwag-X", | |
| "author": "OpenGPT-X", | |
| "author_url": "https://opengpt-x.de", | |
| "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx", | |
| "n_languages": 20, | |
| "tasks": [ | |
| "question_answering" | |
| ], | |
| "parallel": true, | |
| "translation": "machine", | |
| "base": "HellaSwag", | |
| "implemented": false, | |
| "group": "Adversarial Language Modelling" | |
| }, | |
| { | |
| "name": "WikiANN / PAN-X", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", | |
| "n_languages": 176, | |
| "tasks": [ | |
| "ner" | |
| ], | |
| "parallel": false, | |
| "group": "Named Entity Recognition" | |
| }, | |
| { | |
| "name": "MasakhaNER", | |
| "author": "Masakhane", | |
| "author_url": "https://www.masakhane.io", | |
| "url": "https://huggingface.co/datasets/masakhane/masakhaner", | |
| "n_languages": 10, | |
| "tasks": [ | |
| "ner" | |
| ], | |
| "parallel": null, | |
| "group": "Named Entity Recognition" | |
| }, | |
| { | |
| "name": "Tülu 3 SFT Mixture", | |
| "author": "AllenAI", | |
| "author_url": "https://allenai.org", | |
| "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", | |
| "n_languages": 70, | |
| "tasks": [ | |
| "instruction_following" | |
| ], | |
| "parallel": false, | |
| "group": "Instruction Following" | |
| }, | |
| { | |
| "name": "xP3", | |
| "author": "BigScience", | |
| "author_url": "https://bigscience.huggingface.co", | |
| "url": "https://huggingface.co/datasets/bigscience/xP3", | |
| "n_languages": 46, | |
| "tasks": [ | |
| "instruction_following" | |
| ], | |
| "parallel": false, | |
| "group": "Instruction Following" | |
| }, | |
| { | |
| "name": "Aya", | |
| "author": "Cohere", | |
| "author_url": "https://cohere.com", | |
| "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", | |
| "n_languages": 65, | |
| "tasks": [ | |
| "instruction_following" | |
| ], | |
| "parallel": null, | |
| "group": "Instruction Following" | |
| }, | |
| { | |
| "name": "SEA-IFEVAL", | |
| "author": "AI Singapore", | |
| "author_url": "https://aisingapore.org", | |
| "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", | |
| "n_languages": 7, | |
| "tasks": [ | |
| "instruction_following" | |
| ], | |
| "parallel": true, | |
| "base": "IFEVAL", | |
| "group": "Instruction Following" | |
| }, | |
| { | |
| "name": "Babel-670", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://github.com/UBC-NLP/Babel-670-Language-Identification", | |
| "n_languages": 670, | |
| "tasks": [ | |
| "language_identification" | |
| ], | |
| "parallel": false, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "CulturaX", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/uonlp/CulturaX", | |
| "n_languages": 167, | |
| "tasks": [ | |
| "language_modeling" | |
| ], | |
| "parallel": false, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "XTREME", | |
| "author": "Google", | |
| "author_url": "https://google.com", | |
| "url": "https://huggingface.co/datasets/google/xtreme", | |
| "n_languages": 40, | |
| "tasks": [ | |
| "translation", | |
| "classification", | |
| "question_answering", | |
| "ner" | |
| ], | |
| "parallel": null, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "XLSUM", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/csebuetnlp/xlsum", | |
| "n_languages": 45, | |
| "tasks": [ | |
| "summarization" | |
| ], | |
| "parallel": true, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "MSVAMP", | |
| "author": "Microsoft", | |
| "author_url": "https://microsoft.ai", | |
| "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", | |
| "n_languages": 10, | |
| "tasks": [ | |
| "math" | |
| ], | |
| "parallel": true, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "Multilingual Sentiments", | |
| "author": "Academic", | |
| "author_url": null, | |
| "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", | |
| "n_languages": 12, | |
| "tasks": [ | |
| "sentiment_analysis" | |
| ], | |
| "parallel": null, | |
| "group": "Other Tasks" | |
| }, | |
| { | |
| "name": "Lanfrica", | |
| "author": "Lanfrica", | |
| "author_url": "https://lanfrica.com", | |
| "url": "https://lanfrica.com/records?language=yor&task=machine%20translation", | |
| "n_languages": 2200, | |
| "tasks": [ | |
| "datasets" | |
| ], | |
| "parallel": null, | |
| "group": "Dataset Collections" | |
| }, | |
| { | |
| "name": "HuggingFace Languages", | |
| "author": "HuggingFace", | |
| "author_url": "https://huggingface.co", | |
| "url": "https://huggingface.co/languages", | |
| "n_languages": 4680, | |
| "tasks": [ | |
| "datasets", | |
| "models" | |
| ], | |
| "parallel": null, | |
| "group": "Dataset Collections" | |
| }, | |
| { | |
| "name": "HuggingFace Multilingual Datasets", | |
| "author": "HuggingFace", | |
| "author_url": "https://huggingface.co", | |
| "url": "https://huggingface.co/datasets?other=multilinguality:multilingual", | |
| "n_languages": 2012, | |
| "tasks": [ | |
| "datasets" | |
| ], | |
| "parallel": false, | |
| "group": "Dataset Collections" | |
| } | |
| ] |