auditi41 commited on
Commit
2c993e6
1 Parent(s): 982b9ba

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +76 -76
vocab.json CHANGED
@@ -1,82 +1,82 @@
1
  {
2
- "'": 0,
3
- "/": 44,
4
  "[PAD]": 79,
5
  "[UNK]": 78,
6
- "a": 11,
7
- "e": 3,
8
- "g": 37,
9
- "l": 68,
10
- "o": 54,
11
- "p": 34,
12
- "y": 17,
13
- "|": 33,
14
  "।": 72,
15
- "॥": 35,
16
- "ঁ": 53,
17
- "ং": 38,
18
- "ঃ": 18,
19
- "অ": 46,
20
- "আ": 71,
21
- "ই": 57,
22
  "ঈ": 45,
23
- "উ": 29,
24
- "ঊ": 67,
25
- "ঋ": 62,
26
- "এ": 26,
27
- "ঐ": 70,
28
- "ও": 41,
29
- "ঔ": 15,
30
- "ক": 1,
31
- "খ": 10,
32
- "গ": 49,
33
- "ঘ": 63,
34
- "ঙ": 13,
35
- "চ": 76,
36
- "ছ": 66,
37
- "জ": 12,
38
- "ঝ": 43,
39
- "ঞ": 40,
40
- "ট": 77,
41
- "ঠ": 2,
42
- "ড": 55,
43
- "ঢ": 14,
44
- "ণ": 73,
45
- "ত": 31,
46
- "থ": 6,
47
- "দ": 27,
48
- "ধ": 50,
49
- "ন": 23,
50
- "প": 9,
51
- "ফ": 47,
52
- "ব": 32,
53
- "ভ": 28,
54
- "ম": 65,
55
- "য": 61,
56
- "র": 52,
57
- "ল": 19,
58
- "শ": 48,
59
- "ষ": 7,
60
- "স": 30,
61
- "হ": 60,
62
- "়": 22,
63
- "া": 24,
64
- "ি": 56,
65
- "ী": 59,
66
- "ু": 51,
67
- "ূ": 20,
68
- "ৃ": 16,
69
- "ে": 64,
70
- "ৈ": 69,
71
- "ো": 25,
72
- "ৌ": 5,
73
- "্": 42,
74
- "ৎ": 36,
75
- "ড়": 39,
76
- "ঢ়": 8,
77
- "য়": 21,
78
- "ৰ": 74,
79
- "—": 58,
80
- "’": 75,
81
- "‚": 4
82
  }
 
1
  {
2
+ "'": 37,
3
+ "/": 15,
4
  "[PAD]": 79,
5
  "[UNK]": 78,
6
+ "a": 56,
7
+ "e": 67,
8
+ "g": 3,
9
+ "l": 23,
10
+ "o": 69,
11
+ "p": 50,
12
+ "y": 46,
13
+ "|": 9,
14
  "।": 72,
15
+ "॥": 71,
16
+ "ঁ": 1,
17
+ "ং": 55,
18
+ "ঃ": 30,
19
+ "অ": 24,
20
+ "আ": 57,
21
+ "ই": 65,
22
  "ঈ": 45,
23
+ "উ": 52,
24
+ "ঊ": 32,
25
+ "ঋ": 34,
26
+ "এ": 0,
27
+ "ঐ": 68,
28
+ "ও": 36,
29
+ "ঔ": 44,
30
+ "ক": 38,
31
+ "খ": 16,
32
+ "গ": 19,
33
+ "ঘ": 27,
34
+ "ঙ": 60,
35
+ "চ": 8,
36
+ "ছ": 6,
37
+ "জ": 51,
38
+ "ঝ": 12,
39
+ "ঞ": 35,
40
+ "ট": 28,
41
+ "ঠ": 21,
42
+ "ড": 63,
43
+ "ঢ": 2,
44
+ "ণ": 5,
45
+ "ত": 74,
46
+ "থ": 48,
47
+ "দ": 42,
48
+ "ধ": 7,
49
+ "ন": 22,
50
+ "প": 49,
51
+ "ফ": 64,
52
+ "ব": 25,
53
+ "ভ": 75,
54
+ "ম": 39,
55
+ "য": 59,
56
+ "র": 40,
57
+ "ল": 13,
58
+ "শ": 70,
59
+ "ষ": 4,
60
+ "স": 26,
61
+ "হ": 66,
62
+ "়": 76,
63
+ "া": 41,
64
+ "ি": 43,
65
+ "ী": 18,
66
+ "ু": 77,
67
+ "ূ": 54,
68
+ "ৃ": 14,
69
+ "ে": 17,
70
+ "ৈ": 10,
71
+ "ো": 62,
72
+ "ৌ": 53,
73
+ "্": 47,
74
+ "ৎ": 73,
75
+ "ড়": 33,
76
+ "ঢ়": 11,
77
+ "য়": 61,
78
+ "ৰ": 20,
79
+ "—": 29,
80
+ "’": 58,
81
+ "‚": 31
82
  }