Bluecast commited on
Commit
ded9b9b
1 Parent(s): 1a87f8d

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +70 -70
vocab.json CHANGED
@@ -1,76 +1,76 @@
1
  {
2
- "'": 59,
3
  "[PAD]": 73,
4
  "[UNK]": 72,
5
- "|": 25,
6
- "ം": 39,
7
- "ഃ": 57,
8
- "അ": 67,
9
- "ആ": 18,
10
- "ഇ": 3,
11
- "ഈ": 48,
12
- "ഉ": 46,
13
- "ഊ": 28,
14
- "എ": 53,
15
- "ഏ": 43,
16
- "ഐ": 47,
17
- "ഒ": 58,
18
- "ഓ": 10,
19
- "ഔ": 62,
20
- "ക": 1,
21
- "ഖ": 19,
22
- "ഗ": 69,
23
- "ഘ": 50,
24
- "ങ": 64,
25
- "ച": 12,
26
- "ഛ": 6,
27
- "ജ": 14,
28
- "ഞ": 24,
29
- "ട": 7,
30
- "ഠ": 49,
31
- "ഡ": 13,
32
- "ഢ": 41,
33
- "ണ": 29,
34
- "ത": 70,
35
- "ഥ": 4,
36
- "ദ": 21,
37
- "ധ": 9,
38
- "ന": 45,
39
- "പ": 31,
40
- "ഫ": 17,
41
- "ബ": 51,
42
- "ഭ": 15,
43
- "മ": 23,
44
- "യ": 22,
45
- "ര": 26,
46
- "റ": 42,
47
- "ല": 37,
48
- "ള": 16,
49
- "ഴ": 35,
50
- "വ": 34,
51
- "ശ": 8,
52
  "ഷ": 63,
53
- "സ": 61,
54
- "ഹ": 52,
55
- "ാ": 38,
56
- "ി": 65,
57
- "ീ": 40,
58
- "ു": 11,
59
- "ൂ": 68,
60
- "ൃ": 36,
61
- "െ": 44,
62
- "േ": 0,
63
- "ൈ": 54,
64
- "ൊ": 32,
65
- "ോ": 66,
66
  "ൌ": 56,
67
- "്": 33,
68
- "ൗ": 71,
69
- "ൺ": 5,
70
- "ൻ": 55,
71
- "ർ": 2,
72
- "ൽ": 27,
73
- "ൾ": 60,
74
- "ൿ": 20,
75
- "’": 30
76
  }
 
1
  {
2
+ "'": 69,
3
  "[PAD]": 73,
4
  "[UNK]": 72,
5
+ "|": 31,
6
+ "ം": 25,
7
+ "ഃ": 6,
8
+ "അ": 55,
9
+ "ആ": 11,
10
+ "ഇ": 27,
11
+ "ഈ": 60,
12
+ "ഉ": 68,
13
+ "ഊ": 43,
14
+ "എ": 38,
15
+ "ഏ": 71,
16
+ "ഐ": 46,
17
+ "ഒ": 18,
18
+ "ഓ": 47,
19
+ "ഔ": 51,
20
+ "ക": 64,
21
+ "ഖ": 44,
22
+ "ഗ": 9,
23
+ "ഘ": 52,
24
+ "ങ": 61,
25
+ "ച": 19,
26
+ "ഛ": 7,
27
+ "ജ": 32,
28
+ "ഞ": 62,
29
+ "ട": 29,
30
+ "ഠ": 53,
31
+ "ഡ": 42,
32
+ "ഢ": 23,
33
+ "ണ": 67,
34
+ "ത": 8,
35
+ "ഥ": 24,
36
+ "ദ": 16,
37
+ "ധ": 48,
38
+ "ന": 36,
39
+ "പ": 28,
40
+ "ഫ": 5,
41
+ "ബ": 35,
42
+ "ഭ": 4,
43
+ "മ": 65,
44
+ "യ": 26,
45
+ "ര": 30,
46
+ "റ": 13,
47
+ "ല": 22,
48
+ "ള": 17,
49
+ "ഴ": 3,
50
+ "വ": 21,
51
+ "ശ": 10,
52
  "ഷ": 63,
53
+ "സ": 54,
54
+ "ഹ": 15,
55
+ "ാ": 40,
56
+ "ി": 0,
57
+ "ീ": 34,
58
+ "ു": 59,
59
+ "ൂ": 57,
60
+ "ൃ": 70,
61
+ "െ": 1,
62
+ "േ": 33,
63
+ "ൈ": 41,
64
+ "ൊ": 58,
65
+ "ോ": 12,
66
  "ൌ": 56,
67
+ "്": 14,
68
+ "ൗ": 49,
69
+ "ൺ": 39,
70
+ "ൻ": 45,
71
+ "ർ": 50,
72
+ "ൽ": 2,
73
+ "ൾ": 66,
74
+ "ൿ": 37,
75
+ "’": 20
76
  }