add tokenizer
Browse files- .gitignore +1 -0
- vocab.json +1 -1
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"श": 0, "र": 1, "ऋ": 2, "ऊ": 3, "इ": 4, "च": 5, "ी": 6, "ध": 7, "ड": 8, "ब": 9, "द": 10, "म": 11, "ह": 12, "ग": 13, "ा": 14, "ः": 15, "आ": 16, "": 17, "थ": 18, "झ": 19, "ृ": 20, "ौ": 21, "न": 22, "८": 23, "": 24, "ऐ": 25, "्": 26, "ँ": 27, "ॠ": 28, "ढ": 29, "ङ": 30, "े": 32, "प": 33, "ै": 34, "अ": 35, "उ": 36, "स": 37, "ो": 38, "ू": 39, "भ": 40, "क": 41, "ओ": 42, "ए": 43, "ु": 44, "त": 45, "य": 46, "फ": 47, "औ": 48, "ि": 49, "ं": 50, "छ": 51, "ख": 52, "
|
|
|
1 |
+
{"श": 0, "र": 1, "ऋ": 2, "ऊ": 3, "इ": 4, "च": 5, "ी": 6, "ध": 7, "ड": 8, "ब": 9, "द": 10, "म": 11, "ह": 12, "ग": 13, "ा": 14, "ः": 15, "आ": 16, "": 17, "थ": 18, "झ": 19, "ृ": 20, "ौ": 21, "न": 22, "८": 23, "": 24, "ऐ": 25, "्": 26, "ँ": 27, "ॠ": 28, "ढ": 29, "ङ": 30, "े": 32, "प": 33, "ै": 34, "अ": 35, "उ": 36, "स": 37, "ो": 38, "ू": 39, "भ": 40, "क": 41, "ओ": 42, "ए": 43, "ु": 44, "त": 45, "य": 46, "फ": 47, "औ": 48, "ि": 49, "ं": 50, "छ": 51, "ख": 52, "व": 53, "ज": 54, "ञ": 55, "घ": 56, "ट": 57, "ठ": 58, "ष": 59, "ल": 60, "ई": 61, "ण": 62, "|": 31, "[UNK]": 63, "[PAD]": 64}
|