Spaces:

KUIS-AI
/

Cetvel

Running

Cetvel / data /datasets.json

Ilker Kesen

initialize the first version

500fbd7 7 months ago

7.63 kB

	{
	"tquad": {
	"name": "TQUAD",
	"task": "extractive_question_answering",
	"description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.",
	"url": "https://github.com/TQuad/turkish-nlp-qa-dataset",
	"hf_name": "mcemilg/tquad",
	"generative": false
	},
	"xquad_tr": {
	"name": "XQUAD",
	"task": "extractive_question_answering",
	"description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..",
	"url": "https://github.com/google-deepmind/xquad",
	"hf_name": "google/xquad",
	"generative": false
	},
	"mkqa_tr": {
	"name": "MKQA",
	"task": "extractive_question_answering",
	"description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.",
	"url": "https://github.com/apple/ml-mkqa",
	"hf_name": "mcemilg/mkqa_tr",
	"generative": false
	},
	"xlsum_tr": {
	"name": "XLSum",
	"task": "summarization",
	"description": "Abstractive summarization dataset for 44 languages.",
	"url": "https://github.com/csebuetnlp/xl-sum",
	"hf_name": "csebuetnlp/xlsum",
	"generative": true
	},
	"mlsum_tr": {
	"name": "MLSum",
	"task": "summarization",
	"description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.",
	"url": "https://huggingface.co/datasets/reciTAL/mlsum",
	"hf_name": "reciTAL/mlsum",
	"generative": true
	},
	"wiki_lingua_tr": {
	"name": "WikiLingua",
	"task": "summarization",
	"description": "A multilingual abstractive summarization dataset covering 17 languages.",
	"url": "https://github.com/esdurmus/Wikilingua",
	"hf_name": "GEM/wiki_lingua",
	"generative": true
	},
	"tr-wikihow-summ": {
	"name": "WikiHowSumm",
	"task": "summarization",
	"description": "A summarization dataset obtained from WikiHow website.",
	"url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ",
	"hf_name": "ardauzunoglu/tr-wikihow-summ",
	"generative": true
	},
	"mnli_tr": {
	"name": "MNLI",
	"task": "natural_language_inference",
	"description": "Multi-Genre NLI (MNLI) dataset.",
	"url": "https://cims.nyu.edu/~sbowman/multinli/",
	"hf_name": "boun-tabi/nli_tr",
	"generative": false
	},
	"snli_tr": {
	"name": "SNLI",
	"task": "natural_language_inference",
	"description": "The Stanford NLI (SNLI) dataset.",
	"url": "https://nlp.stanford.edu/projects/snli/",
	"hf_name": "boun-tabi/nli_tr",
	"generative": false
	},
	"xnli_tr": {
	"name": "XNLI",
	"task": "natural_language_inference",
	"description": "The Cross-Lingual NLI (XNLI) dataset.",
	"url": "https://github.com/facebookresearch/XNLI",
	"hf_name": "boun-tabi/nli_tr",
	"generative": false
	},
	"xcopa_tr": {
	"name": "XCOPA",
	"task": "multiple_choice",
	"description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.",
	"url": "https://github.com/cambridgeltl/xcopa",
	"hf_name": "cambridgeltl/xcopa",
	"generative": false
	},
	"exams_tr": {
	"name": "Exams",
	"task": "multiple_choice",
	"description": "A question answering dataset covering high school exams.",
	"url": "https://huggingface.co/datasets/exams",
	"hf_name": "exams",
	"generative": false
	},
	"belebele_tr": {
	"name": "Belebele",
	"task": "multiple_choice",
	"description": "A multiple choice question answering dataset to evaluate machine comprehension.",
	"url": "https://github.com/facebookresearch/belebele",
	"generative": false
	},
	"turkish_plu_goal_inference": {
	"name": "PLU-GI",
	"task": "multiple_choice",
	"description": "TurkishPLU - Goal Inference task.",
	"url": "https://github.com/GGLAB-KU/turkish-plu",
	"hf_name": "mcemilg/turkish-plu-goal-inference",
	"generative": false
	},
	"turkish_plu_next_event_prediction": {
	"name": "PLU-NE",
	"task": "multiple_choice",
	"description": "TurkishPLU - Next Event Prediction task.",
	"url": "https://github.com/GGLAB-KU/turkish-plu",
	"hf_name": "mcemilg/turkish-plu-next-event-prediction",
	"generative": false
	},
	"turkish_plu_step_inference": {
	"name": "PLU-SI",
	"task": "multiple_choice",
	"description": "TurkishPLU - Step Inference task.",
	"url": "https://github.com/GGLAB-KU/turkish-plu",
	"hf_name": "mcemilg/turkish-plu-step-inference",
	"generative": false
	},
	"turkish_plu_step_ordering": {
	"name": "PLU-SO",
	"task": "multiple_choice",
	"description": "TurkishPLU - Step Ordering task.",
	"url": "https://github.com/GGLAB-KU/turkish-plu",
	"hf_name": "mcemilg/turkish-plu-step-ordering",
	"generative": false
	},
	"sts_tr": {
	"name": "STS",
	"task": "text_classification",
	"description": "The machine-translated Semantic Textual Similarity dataset in Turkish.",
	"url": "https://github.com/emrecncelik/sts-benchmark-tr",
	"hf_name": "emrecan/stsb-mt-turkish",
	"generative": false
	},
	"offenseval_tr": {
	"name": "OffensEval",
	"task": "text_classification",
	"description": "A dataset for offensive speech recognition in Turkish.",
	"url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020",
	"hf_name": "coltekin/offenseval2020_tr",
	"generative": false
	},
	"news_cat": {
	"name": "NewsCat",
	"task": "text_classification",
	"description": "News classification dataset collected from Turkish newspapers websites.",
	"url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html",
	"hf_name": "mcemilg/news-cat",
	"generative": false
	},
	"ironytr": {
	"name": "IronyTR",
	"task": "text_classification",
	"description": "Irony detection dataset in Turkish.",
	"url": "https://github.com/teghub/IronyTR",
	"hf_name": "mcemilg/IronyTR",
	"generative": false
	},
	"wmt-tr-en-prompt": {
	"name": "WMT",
	"task": "machine_translation",
	"description": "English-to-Turkish machine translation dataset.",
	"url": "http://www.aclweb.org/anthology/W/W16/W16-2301",
	"hf_name": "wmt/wmt16",
	"generative": true
	},
	"gecturk_generation": {
	"name": "GECTurk",
	"task": "grammatical_error_correction",
	"description": "A dataset for grammatical error correction.",
	"url": "https://github.com/GGLAB-KU/gecturk",
	"hf_name": "mcemilg/GECTurk-generation",
	"generative": true
	}
	}