manjugeorge commited on
Commit
c58ddd3
1 Parent(s): 4aa8eb2

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +73 -73
vocab.json CHANGED
@@ -1,78 +1,78 @@
1
  {
2
- "'": 6,
3
  "[PAD]": 75,
4
  "[UNK]": 74,
5
- "|": 39,
6
- "ം": 27,
7
- "ഃ": 1,
8
- "അ": 32,
9
- "ആ": 71,
10
- "ഇ": 16,
11
- "ഈ": 10,
12
- "ഉ": 21,
13
- "ഊ": 30,
14
- "എ": 43,
15
- "ഏ": 58,
16
- "ഐ": 2,
17
- "ഒ": 28,
18
- "ഓ": 59,
19
- "ക": 12,
20
- "ഖ": 63,
21
- "ഗ": 69,
22
- "ഘ": 65,
23
- "ങ": 17,
24
- "ച": 51,
25
- "ഛ": 49,
26
- "ജ": 62,
27
- "ഞ": 18,
28
- "ട": 56,
29
- "ഠ": 26,
30
- "ഡ": 60,
31
- "ഢ": 22,
32
- "ണ": 38,
33
- "ത": 29,
34
- "ഥ": 19,
35
- "ദ": 25,
36
- "ധ": 7,
37
- "ന": 33,
38
- "പ": 50,
39
- "ഫ": 23,
40
- "ബ": 48,
41
- "ഭ": 8,
42
- "മ": 34,
43
- "യ": 31,
44
- "ര": 67,
45
- "റ": 57,
46
- "ല": 3,
47
- "ള": 5,
48
- "ഴ": 42,
49
- "വ": 4,
50
- "ശ": 24,
51
  "ഷ": 9,
52
- "സ": 64,
53
- "ഹ": 37,
54
- "ാ": 20,
55
- "ി": 61,
56
- "ീ": 47,
57
- "ു": 46,
58
- "ൂ": 44,
59
- "ൃ": 15,
60
- "െ": 35,
61
- "േ": 73,
62
- "ൈ": 13,
63
- "ൊ": 68,
64
- "ോ": 66,
65
- "ൌ": 72,
66
- "്": 11,
67
- "ൗ": 52,
68
- "ൺ": 70,
69
- "ൻ": 54,
70
- "ർ": 41,
71
- "ൽ": 53,
72
- "ൾ": 0,
73
- "ൿ": 36,
74
- "‘": 55,
75
- "’": 40,
76
- "“": 14,
77
- "”": 45
78
  }
 
1
  {
2
+ "'": 38,
3
  "[PAD]": 75,
4
  "[UNK]": 74,
5
+ "|": 20,
6
+ "ം": 12,
7
+ "ഃ": 56,
8
+ "അ": 68,
9
+ "ആ": 43,
10
+ "ഇ": 25,
11
+ "ഈ": 13,
12
+ "ഉ": 19,
13
+ "ഊ": 71,
14
+ "എ": 21,
15
+ "ഏ": 60,
16
+ "ഐ": 10,
17
+ "ഒ": 51,
18
+ "ഓ": 70,
19
+ "ക": 22,
20
+ "ഖ": 15,
21
+ "ഗ": 52,
22
+ "ഘ": 7,
23
+ "ങ": 50,
24
+ "ച": 35,
25
+ "ഛ": 40,
26
+ "ജ": 39,
27
+ "ഞ": 46,
28
+ "ട": 5,
29
+ "ഠ": 72,
30
+ "ഡ": 53,
31
+ "ഢ": 62,
32
+ "ണ": 66,
33
+ "ത": 42,
34
+ "ഥ": 32,
35
+ "ദ": 37,
36
+ "ധ": 24,
37
+ "ന": 27,
38
+ "പ": 69,
39
+ "ഫ": 11,
40
+ "ബ": 23,
41
+ "ഭ": 0,
42
+ "മ": 3,
43
+ "യ": 48,
44
+ "ര": 64,
45
+ "റ": 4,
46
+ "ല": 49,
47
+ "ള": 17,
48
+ "ഴ": 6,
49
+ "വ": 29,
50
+ "ശ": 30,
51
  "ഷ": 9,
52
+ "സ": 61,
53
+ "ഹ": 59,
54
+ "ാ": 33,
55
+ "ി": 47,
56
+ "ീ": 65,
57
+ "ു": 34,
58
+ "ൂ": 18,
59
+ "ൃ": 2,
60
+ "െ": 1,
61
+ "േ": 8,
62
+ "ൈ": 63,
63
+ "ൊ": 54,
64
+ "ോ": 44,
65
+ "ൌ": 58,
66
+ "്": 41,
67
+ "ൗ": 67,
68
+ "ൺ": 31,
69
+ "ൻ": 73,
70
+ "ർ": 45,
71
+ "ൽ": 16,
72
+ "ൾ": 28,
73
+ "ൿ": 57,
74
+ "‘": 14,
75
+ "’": 36,
76
+ "“": 26,
77
+ "”": 55
78
  }