xlmr-tatoeba / config.json
bigpang's picture
Upload XLMRobertaForSequenceClassification
b4f839c
raw history blame
No virus
8.57 kB
{
"_name_or_path": "model/xlm-roberta-base-finetuned-language-detection-tatoeba/checkpoint-32346/",
"architectures": [
"XLMRobertaForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"classifier_dropout": null,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "Ainu",
"1": "Algerian Arabic",
"2": "Asturian",
"3": "Awadhi",
"4": "Baluchi",
"5": "Banjar",
"6": "Bavarian",
"7": "Berber languages",
"8": "Brithenig",
"9": "Buriat",
"10": "Cantonese",
"11": "Cebuano",
"12": "Central Dusun",
"13": "Central Kanuri",
"14": "Choctaw",
"15": "Chukot",
"16": "Coastal Kadazan",
"17": "Congo Swahili",
"18": "Creek",
"19": "Crimean Turkish",
"20": "Eastern Mari",
"21": "Egyptian Arabic",
"22": "Emilian-Romagnol",
"23": "Evenki",
"24": "Gondi",
"25": "Guadeloupean Creole French",
"26": "Gulf Arabic",
"27": "Hawaiian",
"28": "Ho",
"29": "Hunsrik",
"30": "Iloko",
"31": "Iranian Persian",
"32": "Kabyle",
"33": "Kalmyk",
"34": "Karachay-Balkar",
"35": "Karelian",
"36": "Kashubian",
"37": "Khasi",
"38": "Khmer",
"39": "Kumyk",
"40": "Kven Finnish",
"41": "Kyrgyz",
"42": "Ladino",
"43": "Latgalian",
"44": "Lhunda",
"45": "Ligurian",
"46": "Lingua Franca Nova",
"47": "Low German",
"48": "Lower Sorbian",
"49": "Malay (individual language)",
"50": "Mikasuki",
"51": "Minangkabau",
"52": "Mingrelian",
"53": "Modern Greek",
"54": "Nahuatl languages",
"55": "Nepali (individual language)",
"56": "Nogai",
"57": "North Frisian",
"58": "North Levantine Arabic",
"59": "North Moluccan Malay",
"60": "Northern Kurdish",
"61": "Nuer",
"62": "Occitan",
"63": "Old Tupi",
"64": "Oriya",
"65": "Pampanga",
"66": "Pattani Malay",
"67": "Picard",
"68": "Piedmontese",
"69": "Punjabi",
"70": "Rohingya",
"71": "Romany",
"72": "Rusyn",
"73": "Sakha",
"74": "Santali",
"75": "Seraiki",
"76": "Sorani Kurdish",
"77": "Southern Kurdish",
"78": "Sranan Tongo",
"79": "Standard Latvian",
"80": "Standard Malay",
"81": "Standard Moroccan Tamazight",
"82": "Swabian",
"83": "Swahili",
"84": "Swiss German",
"85": "Tachawit",
"86": "Tachelhit",
"87": "Tarifit",
"88": "Tase Naga",
"89": "Tetum",
"90": "Tigr\u00e9",
"91": "Tok Pisin",
"92": "Udmurt",
"93": "Upper Sorbian",
"94": "Uyghur",
"95": "Venetian",
"96": "Waray-Waray",
"97": "Wayuu",
"98": "Wu Chinese",
"99": "Zamboanga Chavacano",
"100": "Zaza",
"101": "af",
"102": "am",
"103": "an",
"104": "ar",
"105": "as",
"106": "az",
"107": "ba",
"108": "be",
"109": "bg",
"110": "bn",
"111": "br",
"112": "bs",
"113": "ca",
"114": "ch",
"115": "cs",
"116": "cv",
"117": "cy",
"118": "da",
"119": "de",
"120": "el",
"121": "en",
"122": "es",
"123": "et",
"124": "eu",
"125": "fi",
"126": "fo",
"127": "fr",
"128": "fy",
"129": "ga",
"130": "gd",
"131": "gl",
"132": "gn",
"133": "gu",
"134": "ha",
"135": "he",
"136": "hi",
"137": "hr",
"138": "hu",
"139": "hy",
"140": "id",
"141": "is",
"142": "it",
"143": "ja",
"144": "jv",
"145": "ka",
"146": "kk",
"147": "kl",
"148": "kn",
"149": "ko",
"150": "kw",
"151": "lb",
"152": "ln",
"153": "lo",
"154": "lt",
"155": "mi",
"156": "mk",
"157": "ml",
"158": "mn",
"159": "mr",
"160": "mt",
"161": "my",
"162": "nb",
"163": "nl",
"164": "nn",
"165": "os",
"166": "pl",
"167": "pt",
"168": "qu",
"169": "rn",
"170": "ro",
"171": "ru",
"172": "se",
"173": "sk",
"174": "sl",
"175": "sq",
"176": "sr",
"177": "sv",
"178": "sw",
"179": "ta",
"180": "te",
"181": "th",
"182": "ti",
"183": "tk",
"184": "tl",
"185": "tr",
"186": "tt",
"187": "ty",
"188": "uk",
"189": "ur",
"190": "uz",
"191": "vi",
"192": "wo",
"193": "xh",
"194": "yi",
"195": "zh"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"Ainu": 0,
"Algerian Arabic": 1,
"Asturian": 2,
"Awadhi": 3,
"Baluchi": 4,
"Banjar": 5,
"Bavarian": 6,
"Berber languages": 7,
"Brithenig": 8,
"Buriat": 9,
"Cantonese": 10,
"Cebuano": 11,
"Central Dusun": 12,
"Central Kanuri": 13,
"Choctaw": 14,
"Chukot": 15,
"Coastal Kadazan": 16,
"Congo Swahili": 17,
"Creek": 18,
"Crimean Turkish": 19,
"Eastern Mari": 20,
"Egyptian Arabic": 21,
"Emilian-Romagnol": 22,
"Evenki": 23,
"Gondi": 24,
"Guadeloupean Creole French": 25,
"Gulf Arabic": 26,
"Hawaiian": 27,
"Ho": 28,
"Hunsrik": 29,
"Iloko": 30,
"Iranian Persian": 31,
"Kabyle": 32,
"Kalmyk": 33,
"Karachay-Balkar": 34,
"Karelian": 35,
"Kashubian": 36,
"Khasi": 37,
"Khmer": 38,
"Kumyk": 39,
"Kven Finnish": 40,
"Kyrgyz": 41,
"Ladino": 42,
"Latgalian": 43,
"Lhunda": 44,
"Ligurian": 45,
"Lingua Franca Nova": 46,
"Low German": 47,
"Lower Sorbian": 48,
"Malay (individual language)": 49,
"Mikasuki": 50,
"Minangkabau": 51,
"Mingrelian": 52,
"Modern Greek": 53,
"Nahuatl languages": 54,
"Nepali (individual language)": 55,
"Nogai": 56,
"North Frisian": 57,
"North Levantine Arabic": 58,
"North Moluccan Malay": 59,
"Northern Kurdish": 60,
"Nuer": 61,
"Occitan": 62,
"Old Tupi": 63,
"Oriya": 64,
"Pampanga": 65,
"Pattani Malay": 66,
"Picard": 67,
"Piedmontese": 68,
"Punjabi": 69,
"Rohingya": 70,
"Romany": 71,
"Rusyn": 72,
"Sakha": 73,
"Santali": 74,
"Seraiki": 75,
"Sorani Kurdish": 76,
"Southern Kurdish": 77,
"Sranan Tongo": 78,
"Standard Latvian": 79,
"Standard Malay": 80,
"Standard Moroccan Tamazight": 81,
"Swabian": 82,
"Swahili": 83,
"Swiss German": 84,
"Tachawit": 85,
"Tachelhit": 86,
"Tarifit": 87,
"Tase Naga": 88,
"Tetum": 89,
"Tigr\u00e9": 90,
"Tok Pisin": 91,
"Udmurt": 92,
"Upper Sorbian": 93,
"Uyghur": 94,
"Venetian": 95,
"Waray-Waray": 96,
"Wayuu": 97,
"Wu Chinese": 98,
"Zamboanga Chavacano": 99,
"Zaza": 100,
"af": 101,
"am": 102,
"an": 103,
"ar": 104,
"as": 105,
"az": 106,
"ba": 107,
"be": 108,
"bg": 109,
"bn": 110,
"br": 111,
"bs": 112,
"ca": 113,
"ch": 114,
"cs": 115,
"cv": 116,
"cy": 117,
"da": 118,
"de": 119,
"el": 120,
"en": 121,
"es": 122,
"et": 123,
"eu": 124,
"fi": 125,
"fo": 126,
"fr": 127,
"fy": 128,
"ga": 129,
"gd": 130,
"gl": 131,
"gn": 132,
"gu": 133,
"ha": 134,
"he": 135,
"hi": 136,
"hr": 137,
"hu": 138,
"hy": 139,
"id": 140,
"is": 141,
"it": 142,
"ja": 143,
"jv": 144,
"ka": 145,
"kk": 146,
"kl": 147,
"kn": 148,
"ko": 149,
"kw": 150,
"lb": 151,
"ln": 152,
"lo": 153,
"lt": 154,
"mi": 155,
"mk": 156,
"ml": 157,
"mn": 158,
"mr": 159,
"mt": 160,
"my": 161,
"nb": 162,
"nl": 163,
"nn": 164,
"os": 165,
"pl": 166,
"pt": 167,
"qu": 168,
"rn": 169,
"ro": 170,
"ru": 171,
"se": 172,
"sk": 173,
"sl": 174,
"sq": 175,
"sr": 176,
"sv": 177,
"sw": 178,
"ta": 179,
"te": 180,
"th": 181,
"ti": 182,
"tk": 183,
"tl": 184,
"tr": 185,
"tt": 186,
"ty": 187,
"uk": 188,
"ur": 189,
"uz": 190,
"vi": 191,
"wo": 192,
"xh": 193,
"yi": 194,
"zh": 195
},
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "xlm-roberta",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"output_past": true,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"problem_type": "single_label_classification",
"torch_dtype": "float32",
"transformers_version": "4.27.3",
"type_vocab_size": 1,
"use_cache": true,
"vocab_size": 250002
}