add tokenizer
Browse files
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<s>":
|
|
|
1 |
+
{"<s>": 69, "</s>": 70}
|
runs/Jan02_06-38-08_1f4e64a063c7/1641105665.0038888/events.out.tfevents.1641105665.1f4e64a063c7.75.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7668e49cd5d277e8d4c4f66be11d5dcc08a4cfb92f81149b80a883f76f68eef
|
3 |
+
size 4772
|
runs/Jan02_06-38-08_1f4e64a063c7/events.out.tfevents.1641105664.1f4e64a063c7.75.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:704dcbc85285b62d934b51f719b4ca1d654c3bb8c238e6422bd2df9b5e3bbfaf
|
3 |
+
size 4738
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"ก": 1, "ข": 2, "ค": 3, "ฆ": 4, "ง": 5, "จ": 6, "ฉ": 7, "ช": 8, "ซ": 9, "ญ": 10, "ฎ": 11, "ฏ": 12, "ฐ": 13, "ฑ": 14, "ฒ": 15, "ณ": 16, "ด": 17, "ต": 18, "ถ": 19, "ท": 20, "ธ": 21, "น": 22, "บ": 23, "ป": 24, "ผ": 25, "ฝ": 26, "พ": 27, "ฟ": 28, "ภ": 29, "ม": 30, "ย": 31, "ร": 32, "ฤ": 33, "ล": 34, "ว": 35, "ศ": 36, "ษ": 37, "ส": 38, "ห": 39, "ฬ": 40, "อ": 41, "ฮ": 42, "ะ": 43, "ั": 44, "า": 45, "ำ": 46, "ิ": 47, "ี": 48, "ึ": 49, "ื": 50, "ุ": 51, "ู": 52, "เ": 53, "แ": 54, "โ": 55, "ใ": 56, "ไ": 57, "ๅ": 58, "ๆ": 59, "็": 60, "่": 61, "้": 62, "๊": 63, "๋": 64, "์": 65, "ํ": 66, "|": 0, "[UNK]": 67, "[PAD]": 68}
|