manjugeorge commited on
Commit
0772aeb
1 Parent(s): e1e9591

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +75 -81
vocab.json CHANGED
@@ -1,84 +1,78 @@
1
  {
2
- "!": 41,
3
- "'": 2,
4
- ",": 61,
5
- "-": 20,
6
- ".": 29,
7
- ";": 5,
8
- "?": 13,
9
- "[PAD]": 81,
10
- "[UNK]": 80,
11
- "|": 26,
12
- "": 23,
13
- "": 40,
14
- "": 42,
15
- "": 63,
16
- "": 35,
17
- "": 72,
18
- "": 57,
19
- "": 19,
20
- "": 55,
21
- "": 36,
22
- "": 77,
23
- "": 46,
24
- "": 43,
25
- "": 45,
26
- "": 34,
27
- "": 7,
28
- "": 32,
29
- "": 38,
30
- "": 11,
31
- "": 59,
32
- "": 1,
33
- "": 67,
34
- "": 33,
35
- "": 22,
36
- "": 14,
37
- "": 4,
38
- "": 58,
39
- "": 47,
40
- "ഥ": 8,
41
- "ദ": 28,
42
- "ധ": 65,
43
- "ന": 64,
44
- "പ": 53,
45
- "ഫ": 52,
46
  "ബ": 50,
47
- "ഭ": 0,
48
- "മ": 24,
49
- "യ": 16,
50
- "ര": 74,
51
- "റ": 70,
52
- "ല": 27,
53
- "ള": 49,
54
- "ഴ": 10,
55
- "വ": 21,
56
- "ശ": 75,
57
- "ഷ": 78,
58
- "സ": 31,
59
- "ഹ": 73,
60
- "ാ": 6,
61
- "ി": 44,
62
- "ീ": 39,
63
- "ു": 54,
64
- "ൂ": 9,
65
- "ൃ": 37,
66
- "െ": 30,
67
- "േ": 69,
68
- "ൈ": 25,
69
- "ൊ": 66,
70
- "ോ": 17,
71
- "ൌ": 76,
72
- "്": 68,
73
- "ൗ": 3,
74
- "ൺ": 71,
75
- "ൻ": 12,
76
- "ർ": 48,
77
- "ൽ": 60,
78
- "ൾ": 62,
79
- "ൿ": 18,
80
- "‘": 15,
81
- "’": 56,
82
- "“": 51,
83
- "”": 79
84
  }
 
1
  {
2
+ " ": 88,
3
+ "'": 23,
4
+ "[PAD]": 75,
5
+ "[UNK]": 74,
6
+ "": 31,
7
+ "": 25,
8
+ "": 20,
9
+ "": 8,
10
+ "": 61,
11
+ "": 48,
12
+ "": 73,
13
+ "": 27,
14
+ "": 68,
15
+ "": 63,
16
+ "": 64,
17
+ "": 40,
18
+ "": 22,
19
+ "": 32,
20
+ "": 59,
21
+ "": 37,
22
+ "": 60,
23
+ "": 3,
24
+ "": 65,
25
+ "": 5,
26
+ "": 11,
27
+ "": 17,
28
+ "": 58,
29
+ "": 30,
30
+ "": 1,
31
+ "": 33,
32
+ "": 0,
33
+ "": 45,
34
+ "": 44,
35
+ "": 42,
36
+ "": 16,
37
+ "": 12,
38
+ "": 66,
39
+ "": 56,
 
 
 
 
 
 
40
  "ബ": 50,
41
+ "ഭ": 9,
42
+ "മ": 36,
43
+ "യ": 13,
44
+ "ര": 51,
45
+ "റ": 26,
46
+ "ല": 41,
47
+ "ള": 47,
48
+ "ഴ": 62,
49
+ "വ": 29,
50
+ "ശ": 52,
51
+ "ഷ": 70,
52
+ "സ": 54,
53
+ "ഹ": 49,
54
+ "ാ": 34,
55
+ "ി": 21,
56
+ "ീ": 4,
57
+ "ു": 67,
58
+ "ൂ": 57,
59
+ "ൃ": 2,
60
+ "െ": 43,
61
+ "േ": 19,
62
+ "ൈ": 39,
63
+ "ൊ": 15,
64
+ "ോ": 14,
65
+ "ൌ": 53,
66
+ "്": 18,
67
+ "ൗ": 28,
68
+ "ൺ": 35,
69
+ "ൻ": 69,
70
+ "ർ": 46,
71
+ "ൽ": 6,
72
+ "ൾ": 7,
73
+ "ൿ": 24,
74
+ "‘": 10,
75
+ "’": 72,
76
+ "“": 55,
77
+ "”": 71
78
  }