Spaces:
Running
Running
{ | |
"tquad": { | |
"name": "TQUAD", | |
"task": "extractive_question_answering", | |
"description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.", | |
"url": "https://github.com/TQuad/turkish-nlp-qa-dataset", | |
"hf_name": "mcemilg/tquad", | |
"generative": false | |
}, | |
"xquad_tr": { | |
"name": "XQUAD", | |
"task": "extractive_question_answering", | |
"description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..", | |
"url": "https://github.com/google-deepmind/xquad", | |
"hf_name": "google/xquad", | |
"generative": false | |
}, | |
"mkqa_tr": { | |
"name": "MKQA", | |
"task": "extractive_question_answering", | |
"description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.", | |
"url": "https://github.com/apple/ml-mkqa", | |
"hf_name": "mcemilg/mkqa_tr", | |
"generative": false | |
}, | |
"xlsum_tr": { | |
"name": "XLSum", | |
"task": "summarization", | |
"description": "Abstractive summarization dataset for 44 languages.", | |
"url": "https://github.com/csebuetnlp/xl-sum", | |
"hf_name": "csebuetnlp/xlsum", | |
"generative": true | |
}, | |
"mlsum_tr": { | |
"name": "MLSum", | |
"task": "summarization", | |
"description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.", | |
"url": "https://huggingface.co/datasets/reciTAL/mlsum", | |
"hf_name": "reciTAL/mlsum", | |
"generative": true | |
}, | |
"wiki_lingua_tr": { | |
"name": "WikiLingua", | |
"task": "summarization", | |
"description": "A multilingual abstractive summarization dataset covering 17 languages.", | |
"url": "https://github.com/esdurmus/Wikilingua", | |
"hf_name": "GEM/wiki_lingua", | |
"generative": true | |
}, | |
"tr-wikihow-summ": { | |
"name": "WikiHowSumm", | |
"task": "summarization", | |
"description": "A summarization dataset obtained from WikiHow website.", | |
"url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ", | |
"hf_name": "ardauzunoglu/tr-wikihow-summ", | |
"generative": true | |
}, | |
"mnli_tr": { | |
"name": "MNLI", | |
"task": "natural_language_inference", | |
"description": "Multi-Genre NLI (MNLI) dataset.", | |
"url": "https://cims.nyu.edu/~sbowman/multinli/", | |
"hf_name": "boun-tabi/nli_tr", | |
"generative": false | |
}, | |
"snli_tr": { | |
"name": "SNLI", | |
"task": "natural_language_inference", | |
"description": "The Stanford NLI (SNLI) dataset.", | |
"url": "https://nlp.stanford.edu/projects/snli/", | |
"hf_name": "boun-tabi/nli_tr", | |
"generative": false | |
}, | |
"xnli_tr": { | |
"name": "XNLI", | |
"task": "natural_language_inference", | |
"description": "The Cross-Lingual NLI (XNLI) dataset.", | |
"url": "https://github.com/facebookresearch/XNLI", | |
"hf_name": "boun-tabi/nli_tr", | |
"generative": false | |
}, | |
"xcopa_tr": { | |
"name": "XCOPA", | |
"task": "multiple_choice", | |
"description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.", | |
"url": "https://github.com/cambridgeltl/xcopa", | |
"hf_name": "cambridgeltl/xcopa", | |
"generative": false | |
}, | |
"exams_tr": { | |
"name": "Exams", | |
"task": "multiple_choice", | |
"description": "A question answering dataset covering high school exams.", | |
"url": "https://huggingface.co/datasets/exams", | |
"hf_name": "exams", | |
"generative": false | |
}, | |
"belebele_tr": { | |
"name": "Belebele", | |
"task": "multiple_choice", | |
"description": "A multiple choice question answering dataset to evaluate machine comprehension.", | |
"url": "https://github.com/facebookresearch/belebele", | |
"generative": false | |
}, | |
"turkish_plu_goal_inference": { | |
"name": "PLU-GI", | |
"task": "multiple_choice", | |
"description": "TurkishPLU - Goal Inference task.", | |
"url": "https://github.com/GGLAB-KU/turkish-plu", | |
"hf_name": "mcemilg/turkish-plu-goal-inference", | |
"generative": false | |
}, | |
"turkish_plu_next_event_prediction": { | |
"name": "PLU-NE", | |
"task": "multiple_choice", | |
"description": "TurkishPLU - Next Event Prediction task.", | |
"url": "https://github.com/GGLAB-KU/turkish-plu", | |
"hf_name": "mcemilg/turkish-plu-next-event-prediction", | |
"generative": false | |
}, | |
"turkish_plu_step_inference": { | |
"name": "PLU-SI", | |
"task": "multiple_choice", | |
"description": "TurkishPLU - Step Inference task.", | |
"url": "https://github.com/GGLAB-KU/turkish-plu", | |
"hf_name": "mcemilg/turkish-plu-step-inference", | |
"generative": false | |
}, | |
"turkish_plu_step_ordering": { | |
"name": "PLU-SO", | |
"task": "multiple_choice", | |
"description": "TurkishPLU - Step Ordering task.", | |
"url": "https://github.com/GGLAB-KU/turkish-plu", | |
"hf_name": "mcemilg/turkish-plu-step-ordering", | |
"generative": false | |
}, | |
"sts_tr": { | |
"name": "STS", | |
"task": "text_classification", | |
"description": "The machine-translated Semantic Textual Similarity dataset in Turkish.", | |
"url": "https://github.com/emrecncelik/sts-benchmark-tr", | |
"hf_name": "emrecan/stsb-mt-turkish", | |
"generative": false | |
}, | |
"offenseval_tr": { | |
"name": "OffensEval", | |
"task": "text_classification", | |
"description": "A dataset for offensive speech recognition in Turkish.", | |
"url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020", | |
"hf_name": "coltekin/offenseval2020_tr", | |
"generative": false | |
}, | |
"news_cat": { | |
"name": "NewsCat", | |
"task": "text_classification", | |
"description": "News classification dataset collected from Turkish newspapers websites.", | |
"url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html", | |
"hf_name": "mcemilg/news-cat", | |
"generative": false | |
}, | |
"ironytr": { | |
"name": "IronyTR", | |
"task": "text_classification", | |
"description": "Irony detection dataset in Turkish.", | |
"url": "https://github.com/teghub/IronyTR", | |
"hf_name": "mcemilg/IronyTR", | |
"generative": false | |
}, | |
"wmt-tr-en-prompt": { | |
"name": "WMT", | |
"task": "machine_translation", | |
"description": "English-to-Turkish machine translation dataset.", | |
"url": "http://www.aclweb.org/anthology/W/W16/W16-2301", | |
"hf_name": "wmt/wmt16", | |
"generative": true | |
}, | |
"gecturk_generation": { | |
"name": "GECTurk", | |
"task": "grammatical_error_correction", | |
"description": "A dataset for grammatical error correction.", | |
"url": "https://github.com/GGLAB-KU/gecturk", | |
"hf_name": "mcemilg/GECTurk-generation", | |
"generative": true | |
} | |
} |