Tanel commited on
Commit
6faad9d
1 Parent(s): 0421b5c

Now uses language labels of the form code: Language

Browse files
Files changed (1) hide show
  1. label_encoder.txt +109 -109
label_encoder.txt CHANGED
@@ -1,109 +1,109 @@
1
- 'ab' => 0
2
- 'af' => 1
3
- 'am' => 2
4
- 'ar' => 3
5
- 'as' => 4
6
- 'az' => 5
7
- 'ba' => 6
8
- 'be' => 7
9
- 'bg' => 8
10
- 'bn' => 9
11
- 'bo' => 10
12
- 'br' => 11
13
- 'bs' => 12
14
- 'ca' => 13
15
- 'ceb' => 14
16
- 'cs' => 15
17
- 'cy' => 16
18
- 'da' => 17
19
- 'de' => 18
20
- 'el' => 19
21
- 'en' => 20
22
- 'eo' => 21
23
- 'es' => 22
24
- 'et' => 23
25
- 'eu' => 24
26
- 'fa' => 25
27
- 'fi' => 26
28
- 'fo' => 27
29
- 'fr' => 28
30
- 'gl' => 29
31
- 'gn' => 30
32
- 'gu' => 31
33
- 'gv' => 32
34
- 'ha' => 33
35
- 'haw' => 34
36
- 'hi' => 35
37
- 'hr' => 36
38
- 'ht' => 37
39
- 'hu' => 38
40
- 'hy' => 39
41
- 'ia' => 40
42
- 'id' => 41
43
- 'is' => 42
44
- 'it' => 43
45
- 'iw' => 44
46
- 'ja' => 45
47
- 'jw' => 46
48
- 'ka' => 47
49
- 'kk' => 48
50
- 'km' => 49
51
- 'kn' => 50
52
- 'ko' => 51
53
- 'la' => 52
54
- 'lb' => 53
55
- 'ln' => 54
56
- 'lo' => 55
57
- 'lt' => 56
58
- 'lv' => 57
59
- 'mg' => 58
60
- 'mi' => 59
61
- 'mk' => 60
62
- 'ml' => 61
63
- 'mn' => 62
64
- 'mr' => 63
65
- 'ms' => 64
66
- 'mt' => 65
67
- 'my' => 66
68
- 'ne' => 67
69
- 'nl' => 68
70
- 'nn' => 69
71
- 'no' => 70
72
- 'oc' => 71
73
- 'pa' => 72
74
- 'pl' => 73
75
- 'ps' => 74
76
- 'pt' => 75
77
- 'ro' => 76
78
- 'ru' => 77
79
- 'sa' => 78
80
- 'sco' => 79
81
- 'sd' => 80
82
- 'si' => 81
83
- 'sk' => 82
84
- 'sl' => 83
85
- 'sn' => 84
86
- 'so' => 85
87
- 'sq' => 86
88
- 'sr' => 87
89
- 'su' => 88
90
- 'sv' => 89
91
- 'sw' => 90
92
- 'ta' => 91
93
- 'te' => 92
94
- 'tg' => 93
95
- 'th' => 94
96
- 'tk' => 95
97
- 'tl' => 96
98
- 'tr' => 97
99
- 'tt' => 98
100
- 'uk' => 99
101
- 'ur' => 100
102
- 'uz' => 101
103
- 'vi' => 102
104
- 'war' => 103
105
- 'yi' => 104
106
- 'yo' => 105
107
- 'zh' => 106
108
- ================
109
- 'starting_index' => 0
1
+ 'ab: Abkhazian' => 0
2
+ 'af: Afrikaans' => 1
3
+ 'am: Amharic' => 2
4
+ 'ar: Arabic' => 3
5
+ 'as: Assamese' => 4
6
+ 'az: Azerbaijani' => 5
7
+ 'ba: Bashkir' => 6
8
+ 'be: Belarusian' => 7
9
+ 'bg: Bulgarian' => 8
10
+ 'bn: Bengali' => 9
11
+ 'bo: Tibetan' => 10
12
+ 'br: Breton' => 11
13
+ 'bs: Bosnian' => 12
14
+ 'ca: Catalan' => 13
15
+ 'ceb: Cebuano' => 14
16
+ 'cs: Czech' => 15
17
+ 'cy: Welsh' => 16
18
+ 'da: Danish' => 17
19
+ 'de: German' => 18
20
+ 'el: Greek' => 19
21
+ 'en: English' => 20
22
+ 'eo: Esperanto' => 21
23
+ 'es: Spanish' => 22
24
+ 'et: Estonian' => 23
25
+ 'eu: Basque' => 24
26
+ 'fa: Persian' => 25
27
+ 'fi: Finnish' => 26
28
+ 'fo: Faroese' => 27
29
+ 'fr: French' => 28
30
+ 'gl: Galician' => 29
31
+ 'gn: Guarani' => 30
32
+ 'gu: Gujarati' => 31
33
+ 'gv: Manx' => 32
34
+ 'ha: Hausa' => 33
35
+ 'haw: Hawaiian' => 34
36
+ 'hi: Hindi' => 35
37
+ 'hr: Croatian' => 36
38
+ 'ht: Haitian' => 37
39
+ 'hu: Hungarian' => 38
40
+ 'hy: Armenian' => 39
41
+ 'ia: Interlingua' => 40
42
+ 'id: Indonesian' => 41
43
+ 'is: Icelandic' => 42
44
+ 'it: Italian' => 43
45
+ 'iw: Hebrew' => 44
46
+ 'ja: Japanese' => 45
47
+ 'jw: Javanese' => 46
48
+ 'ka: Georgian' => 47
49
+ 'kk: Kazakh' => 48
50
+ 'km: Central Khmer' => 49
51
+ 'kn: Kannada' => 50
52
+ 'ko: Korean' => 51
53
+ 'la: Latin' => 52
54
+ 'lb: Luxembourgish' => 53
55
+ 'ln: Lingala' => 54
56
+ 'lo: Lao' => 55
57
+ 'lt: Lithuanian' => 56
58
+ 'lv: Latvian' => 57
59
+ 'mg: Malagasy' => 58
60
+ 'mi: Maori' => 59
61
+ 'mk: Macedonian' => 60
62
+ 'ml: Malayalam' => 61
63
+ 'mn: Mongolian' => 62
64
+ 'mr: Marathi' => 63
65
+ 'ms: Malay' => 64
66
+ 'mt: Maltese' => 65
67
+ 'my: Burmese' => 66
68
+ 'ne: Nepali' => 67
69
+ 'nl: Dutch' => 68
70
+ 'nn: Norwegian Nynorsk' => 69
71
+ 'no: Norwegian' => 70
72
+ 'oc: Occitan' => 71
73
+ 'pa: Panjabi' => 72
74
+ 'pl: Polish' => 73
75
+ 'ps: Pushto' => 74
76
+ 'pt: Portuguese' => 75
77
+ 'ro: Romanian' => 76
78
+ 'ru: Russian' => 77
79
+ 'sa: Sanskrit' => 78
80
+ 'sco: Scots' => 79
81
+ 'sd: Sindhi' => 80
82
+ 'si: Sinhala' => 81
83
+ 'sk: Slovak' => 82
84
+ 'sl: Slovenian' => 83
85
+ 'sn: Shona' => 84
86
+ 'so: Somali' => 85
87
+ 'sq: Albanian' => 86
88
+ 'sr: Serbian' => 87
89
+ 'su: Sundanese' => 88
90
+ 'sv: Swedish' => 89
91
+ 'sw: Swahili' => 90
92
+ 'ta: Tamil' => 91
93
+ 'te: Telugu' => 92
94
+ 'tg: Tajik' => 93
95
+ 'th: Thai' => 94
96
+ 'tk: Turkmen' => 95
97
+ 'tl: Tagalog' => 96
98
+ 'tr: Turkish' => 97
99
+ 'tt: Tatar' => 98
100
+ 'uk: Ukrainian' => 99
101
+ 'ur: Urdu' => 100
102
+ 'uz: Uzbek' => 101
103
+ 'vi: Vietnamese' => 102
104
+ 'war: Waray' => 103
105
+ 'yi: Yiddish' => 104
106
+ 'yo: Yoruba' => 105
107
+ 'zh: Chinese' => 106
108
+ ================
109
+ 'starting_index' => 0