manjugeorge commited on
Commit
320a3cf
1 Parent(s): 309c549

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +81 -75
vocab.json CHANGED
@@ -1,78 +1,84 @@
1
  {
2
- "'": 23,
3
- "[PAD]": 76,
4
- "[UNK]": 76,
5
- "|": 38,
6
- "": 31,
7
- "": 25,
8
- "": 20,
9
- "": 8,
10
- "": 61,
11
- "": 48,
12
- "": 73,
13
- "": 27,
14
- "": 68,
15
- "": 63,
16
- "": 64,
17
- "": 40,
18
- "": 22,
19
- "": 32,
20
- "": 59,
21
- "": 37,
22
- "": 60,
23
- "": 3,
24
- "": 65,
25
- "": 5,
26
- "": 11,
27
- "": 17,
28
- "": 58,
29
- "": 30,
30
- "": 1,
31
- "": 33,
32
- "": 0,
33
- "": 45,
34
- "": 44,
35
- "": 42,
36
- "": 16,
37
- "": 12,
38
- "": 66,
39
- "": 56,
 
 
 
 
 
 
40
  "ബ": 50,
41
- "ഭ": 9,
42
- "മ": 36,
43
- "യ": 13,
44
- "ര": 51,
45
- "റ": 26,
46
- "ല": 41,
47
- "ള": 47,
48
- "ഴ": 62,
49
- "വ": 29,
50
- "ശ": 52,
51
- "ഷ": 70,
52
- "സ": 54,
53
- "ഹ": 49,
54
- "ാ": 34,
55
- "ി": 21,
56
- "ീ": 4,
57
- "ു": 67,
58
- "ൂ": 57,
59
- "ൃ": 2,
60
- "െ": 43,
61
- "േ": 19,
62
- "ൈ": 39,
63
- "ൊ": 15,
64
- "ോ": 14,
65
- "ൌ": 53,
66
- "്": 18,
67
- "ൗ": 28,
68
- "ൺ": 35,
69
- "ൻ": 69,
70
- "ർ": 46,
71
- "ൽ": 6,
72
- "ൾ": 7,
73
- "ൿ": 24,
74
- "‘": 10,
75
- "’": 72,
76
- "“": 55,
77
- "”": 71
78
  }
 
1
  {
2
+ "!": 41,
3
+ "'": 2,
4
+ ",": 61,
5
+ "-": 20,
6
+ ".": 29,
7
+ ";": 5,
8
+ "?": 13,
9
+ "[PAD]": 81,
10
+ "[UNK]": 80,
11
+ "|": 26,
12
+ "": 23,
13
+ "": 40,
14
+ "": 42,
15
+ "": 63,
16
+ "": 35,
17
+ "": 72,
18
+ "": 57,
19
+ "": 19,
20
+ "": 55,
21
+ "": 36,
22
+ "": 77,
23
+ "": 46,
24
+ "": 43,
25
+ "": 45,
26
+ "": 34,
27
+ "": 7,
28
+ "": 32,
29
+ "": 38,
30
+ "": 11,
31
+ "": 59,
32
+ "": 1,
33
+ "": 67,
34
+ "": 33,
35
+ "": 22,
36
+ "": 14,
37
+ "": 4,
38
+ "": 58,
39
+ "": 47,
40
+ "ഥ": 8,
41
+ "ദ": 28,
42
+ "ധ": 65,
43
+ "ന": 64,
44
+ "പ": 53,
45
+ "ഫ": 52,
46
  "ബ": 50,
47
+ "ഭ": 0,
48
+ "മ": 24,
49
+ "യ": 16,
50
+ "ര": 74,
51
+ "റ": 70,
52
+ "ല": 27,
53
+ "ള": 49,
54
+ "ഴ": 10,
55
+ "വ": 21,
56
+ "ശ": 75,
57
+ "ഷ": 78,
58
+ "സ": 31,
59
+ "ഹ": 73,
60
+ "ാ": 6,
61
+ "ി": 44,
62
+ "ീ": 39,
63
+ "ു": 54,
64
+ "ൂ": 9,
65
+ "ൃ": 37,
66
+ "െ": 30,
67
+ "േ": 69,
68
+ "ൈ": 25,
69
+ "ൊ": 66,
70
+ "ോ": 17,
71
+ "ൌ": 76,
72
+ "്": 68,
73
+ "ൗ": 3,
74
+ "ൺ": 71,
75
+ "ൻ": 12,
76
+ "ർ": 48,
77
+ "ൽ": 60,
78
+ "ൾ": 62,
79
+ "ൿ": 18,
80
+ "‘": 15,
81
+ "’": 56,
82
+ "“": 51,
83
+ "”": 79
84
  }