datasets-tagging / tag_set.json
Yacine Jernite
initial_commit
be293db
raw
history blame
No virus
11.3 kB
{"task_structure": {"Txt2Class": "text to classification task", "Txt2Class.Bi": "text to binary classification task", "Txt2Class.Multi.Sing": "text to multiple classes single label", "Txt2Class.Multi.Multi": "text to multiple classes multiple labels", "Strct2Txt": "structured information to text task", "Txt2Strct": "text to structured information task", "Txt2Txt": "text to text task", "Txt": "just text", "Oth": "other"}, "purpose": {"NLI": "natural language inference", "SentA": "sentiment analysis", "MT": "machine translation", "Summ.ext": "extractive summarization", "Summ.abs": "abstractive summarization", "QA.abs": "abstractive question answering", "QA.ext": "extractive question answering", "QA.open": "open domain question answering", "QA.closed": "closed domain question answering", "QA.open.abs": "open domain abstractive question answering", "QA.closed.abs": "closed domain abstractive question answering", "QA.open.ext": "open domain extractive question answering", "QA.closed.ext": "closed domain extractive question answering", "Dialog": "dialogue or multi-turn text", "LM": "language modeling", "NER": "named entity recognition", "Pars": "parsing", "TxtSimp": "text simplification", "Coref": "coreference resolution", "FactChk": "fact checking", "EntLink": "entity linking", "SSplitFus": "sentence splitting/fusion", "SlotFillClz": "slot filling / Cloze test", "InfoRet": "information retrieval", "IntentClass": "intent classification", "SemSim": "semantic similarity", "Oth": "other"}, "language_producers": {"crwdsrc_l": "data produced by crowdsource workers", "machgen_l": "machine-generated data", "found_l": "found data", "Oth": "other"}, "annotation": {"crwdsrc_a": "annotation produced by crowdsource workers", "machgen_a": "machine-generated annotation", "exp_a": "expert annotation", "no_a": "no annotation", "Oth": "other"}, "license": {"afl-3.0": "Academic Free License", "apache-2.0": "Apache license 2.0", "artistic-2.0": "Artistic license 2.0", "bsl-1.0": "Boost Software License 1.0", "bsd-2-clause": "BSD 2-clause \"Simplified\" license", "bsd-3-clause": "BSD 3-clause \"New\" or \"Revised\" license", "bsd-3-clause-clear": "BSD 3-clause Clear license", "cc": "Creative Commons license family", "cc0-1.0": "Creative Commons Zero v1.0 Universal", "cc-by-4.0": "Creative Commons Attribution 4.0", "cc-by-sa-4.0": "Creative Commons Attribution Share Alike 4.0", "wtfpl": "Do What The F*ck You Want To Public License", "ecl-2.0": "Educational Community License v2.0", "epl-1.0": "Eclipse Public License 1.0", "epl-2.0": "Eclipse Public License 2.0", "eupl-1.1": "European Union Public License 1.1", "agpl-3.0": "GNU Affero General Public License v3.0", "gpl": "GNU General Public License family", "gpl-2.0": "GNU General Public License v2.0", "gpl-3.0": "GNU General Public License v3.0", "lgpl": "GNU Lesser General Public License family", "lgpl-2.1": "GNU Lesser General Public License v2.1", "lgpl-3.0": "GNU Lesser General Public License v3.0", "isc": "ISC", "lppl-1.3c": "LaTeX Project Public License v1.3c", "ms-pl": "Microsoft Public License", "mit": "MIT", "mpl-2.0": "Mozilla Public License 2.0", "osl-3.0": "Open Software License 3.0", "postgresql": "PostgreSQL License", "ofl-1.1": "SIL Open Font License 1.1", "ncsa": "University of Illinois/NCSA Open Source License", "unlicense": "The Unlicense", "zlib": "zLib License", "Oth": "other"}, "language": {"cardinality": {"1ling": "monolingual; only one language in the dataset", "trsl": "translation; parallel language use", "multiling": "multilingual; more than one language being used within or across datasets over different content", "Oth": "other"}, "BCP-47": {"en": "English, dialect unknown", "es": "Spanish, dialect unknown", "fr": "French, dialect unknown", "sv": "Swedish, dialect unknown", "fi": "Finnish, dialect unknown", "de": "German, dialect unknown", "ru": "Russian, dialect unknown", "uk": "Ukranian, dialect unknown", "it": "Italian, dialect unknown", "eo": "Esperanto, dialect unknown", "ar": "Arabic, dialect unknown", "tr": "Turkish, dialect unknown", "bg": "Bulgarian, dialect unknown", "pl": "Polish, dialect unknown", "nl": "Dutch, dialect unknown", "id": "Indonesian, dialect unknown", "zh": "Chinese, dialect unknown", "af": "Afrikaans, dialect unknown", "ca": "Catalan, dialect unknown", "cs": "Czech, dialect unknown", "pt": "Portuguese, dialect unknown", "no": "Norwegian, dialect unknown", "he": "Hebrew, dialect unknown", "da": "Danish, dialect unknown", "is": "Icelandic, dialect unknown", "hu": "Hungarian, dialect unknown", "ro": "Romanian, dialect unknown", "ms": "Malay, dialect unknown", "ja": "Japanese, dialect unknown", "hi": "Hindi, dialect unknown", "sl": "Slovene, dialect unknown", "lt": "Lithuanian, dialect unknown", "ht": "Haitian, dialect unknown", "vi": "Vietnamese, dialect unknown", "et": "Estonian, dialect unknown", "el": "Greek, dialect unknown", "hr": "Croatian, dialect unknown", "mt": "Maltese, dialect unknown", "ts": "Tsonga, dialect unknown", "mk": "Macedonian, dialect unknown", "ln": "Lingala, dialect unknown", "ig": "Igbo, dialect unknown", "ee": "Ewe, dialect unknown", "xh": "Xhosa, dialect unknown", "sn": "Shona, dialect unknown", "rw": "Kinyarwanda, dialect unknown", "ny": "Chichewa, dialect unknown", "lv": "Latvian, dialect unknown", "lg": "Ganda, dialect unknown", "ko": "Korean, dialect unknown", "gl": "Galician, dialect unknown", "sg": "Sango, dialect unknown", "yo": "Yoruba, dialect unknown", "ur": "Urdu, dialect unknown", "rn": "Kirundi, dialect unknown", "mr": "Marathi, dialect unknown", "bn": "Bengali, dialect unknown", "nso": "Pedi, dialect unknown", "ty": "Tahitian, dialect unknown", "to": "Tonga, dialect unknown", "gu": "Gujarati, dialect unknown", "eu": "Basque, dialect unknown", "niu": "Niuean, dialect unknown", "guw": "Gun, dialect unknown", "gaa": "Ga, dialect unknown", "crs": "Seselwa Creole French, dialect unknown", "bcl": "Central Bikol, dialect unknown", "tn": "Tswana, dialect unknown", "sm": "Samoan, dialect unknown", "si": "Sinhala, dialect unknown", "nn": "Norwegian Nynorsk, dialect unknown", "nb": "Norwegian Bokm\u00e5l, dialect unknown", "fj": "Fijian, dialect unknown", "be": "Belarusian, dialect unknown", "pon": "Pohnpeian, dialect unknown", "pis": "Pijin, dialect unknown", "pap": "Papiamento, dialect unknown", "pag": "Pangasinan, dialect unknown", "lua": "Luba-Lulua, dialect unknown", "iso": "Isoko, dialect unknown", "ilo": "Iloko, dialect unknown", "gil": "Gilbertese, dialect unknown", "efi": "Efik, dialect unknown", "bzs": "Brazilian Sign Language, dialect unknown", "yi": "Yiddish, dialect unknown", "wa": "Walloon, dialect unknown", "sq": "Albanian, dialect unknown", "or": "Oriya, dialect unknown", "mh": "Marshallese, dialect unknown", "lb": "Luxembourgish, dialect unknown", "ha": "Hausa, dialect unknown", "fy": "Western Frisian, dialect unknown", "fo": "Faroese, dialect unknown", "as": "Assamese, dialect unknown", "tvl": "Tuvalua, dialect unknown", "tll": "Tetela, dialect unknown", "swc": "Congo Swahili, dialect unknown", "lus": "Lushai, dialect unknown", "loz": "Lozi, dialect unknown", "ceb": "Cebuano, dialect unknown", "ti": "Tigrinya, dialect unknown", "st": "Southern Sotho, dialect unknown", "rm": "Romansh, dialect unknown", "oc": "Occitan, dialect unknown", "kg": "Kongo, dialect unknown", "ga": "Irish, dialect unknown", "co": "Corsican, dialect unknown", "an": "Aragonese, dialect unknown", "war": "Waray, dialect unknown", "lue": "Luvale, dialect unknown", "hil": "Hiligaynon, dialect unknown", "bem": "Bemba, dialect unknown", "ase": "American Sign Language, dialect unknown", "zu": "Zulu, dialect unknown", "tw": "Twi, dialect unknown", "tl": "Tagalog, dialect unknown", "sk": "Slovak, dialect unknown", "lu": "Luba-Katanga, dialect unknown", "hy": "Armenian, dialect unknown", "gv": "Manx, dialect unknown", "cy": "Welsh, dialect unknown", "bi": "Bislama, dialect unknown", "am": "Amharic, dialect unknown", "srn": "Sranan Tongo, dialect unknown", "toi": "Tonga (Zambia), dialect unknown", "kqn": "Kaonde, dialect unknown", "se": "Northern Sami, dialect unknown", "ps": "Pashto, dialect unknown", "os": "Ossetian, dialect unknown", "zne": "Zande (individual language), dialect unknown", "wls": "Wallisian, dialect unknown", "tpi": "Tok Pisin, dialect unknown", "tiv": "Tiv, dialect unknown", "run": "Rundi, dialect unknown", "so": "Somali, dialect unknown", "kw": "Cornish, dialect unknown", "ho": "Hiri Motu, dialect unknown", "gd": "Scottish Gaelic, dialect unknown", "br": "Breton, dialect unknown", "tum": "Tumbuka, dialect unknown", "yap": "Yapese, dialect unknown", "rnd": "Ruund, dialect unknown", "mfe": "Morisyen, dialect unknown", "kwy": "San Salvador Kongo, dialect unknown", "chk": "Chuukese, dialect unknown", "ber": "Berber languages, dialect unknown", "wo": "Wolof, dialect unknown", "ve": "Venda, dialect unknown", "th": "Thai, dialect unknown", "sc": "Sardinian, dialect unknown", "ml": "Malayalam, dialect unknown", "mg": "Malagasy, dialect unknown", "km": "Khmer, dialect unknown", "ka": "Georgian, dialect unknown", "mos": "Mossi, dialect unknown", "ta": "Tamil, dialect unknown", "mn": "Mongolian, dialect unknown", "kn": "Kannada, dialect unknown", "az": "Azerbaijani, dialect unknown", "roa": "Romance languages, dialect unknown", "yue": "Yue Chinese, dialect unknown", "tt": "Tatar, dialect unknown", "tk": "Turkmen, dialect unknown", "te": "Telugu, dialect unknown", "na": "Nauru, dialect unknown", "mi": "M\u0101ori, dialect unknown", "cv": "Chuvash, dialect unknown", "ba": "Bashkir, dialect unknown", "cel": "Celtic languages, dialect unknown", "umb": "Umbundu, dialect unknown", "sa": "Sanskrit, dialect unknown", "my": "Burmese, dialect unknown", "lo": "Lao, dialect unknown", "kl": "Kalaallisut, dialect unknown", "io": "Ido, dialect unknown", "ce": "Chechen, dialect unknown", "ab": "Abkhaz, dialect unknown", "fse": "Finnish Sign Language, dialect unknown", "zai": "Isthmus Zapotec, dialect unknown", "tzo": "Tzotzil, dialect unknown", "prl": "Peruvian Sign Language, dialect unknown", "mfs": "Mexican Sign Language, dialect unknown", "nyk": "Nyaneka, dialect unknown", "luo": "Luo, dialect unknown", "lun": "Lunda, dialect unknown", "kwn": "Kwangali, dialect unknown", "csn": "Colombian Sign Language, dialect unknown", "csg": "Chilean Sign Language, dialect unknown", "aed": "Argentine Sign Language, dialect unknown", "sw": "Swahili, dialect unknown", "su": "Sundanese, dialect unknown", "ss": "Swati, dialect unknown", "om": "Oromo, dialect unknown", "nv": "Navajo, dialect unknown", "ng": "Ndonga, dialect unknown", "ne": "Nepali, dialect unknown", "kj": "Kwanyama, dialect unknown", "jv": "Javanese, dialect unknown", "gn": "Guaran\u00ed, dialect unknown", "fa": "Persian, dialect unknown", "ch": "Chamorro, dialect unknown", "bo": "Tibetan Standard, dialect unknown", "wal": "Wolaitta, dialect unknown", "vsl": "Venezuelan Sign Language, dialect unknown", "ssp": "Spanish Sign Language, dialect unknown", "kab": "Kabyle, dialect unknown", "yua": "Yucateco, dialect unknown", "tdt": "Tetun Dili, dialect unknown", "pa": "Punjabi, dialect unknown", "nr": "Southern Ndebele, dialect unknown", "kk": "Kazakh, dialect unknown", "dv": "Divehi, dialect unknown", "Oth": "other"}}}