Trifon commited on
Commit
49af5e2
1 Parent(s): 5cb5da2

add tokenizer

Browse files
runs/Aug20_18-18-44_94a3aea4ffd6/events.out.tfevents.1661019752.94a3aea4ffd6.91.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27344bf2c09b175f07f21a77f5713e914908285e6b1c18dec02f684f5093a95a
3
- size 5208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f2173e547168e762f8094d4f092656a0340d6d9a99e0b99caba9ed3dff5284
3
+ size 6468
vocab.json CHANGED
@@ -1 +1 @@
1
- {"č": 0, "Ɗ": 1, "á": 2, "Č": 3, "ŀ": 4, "ĸ": 5, "Ľ": 6, "Ø": 7, "Ý": 8, "ï": 9, "Ǝ": 10, "Ğ": 11, "ĩ": 12, "ß": 13, "Ň": 14, "ħ": 15, "Ă": 16, "ë": 17, "æ": 18, "Ō": 19, "Ę": 20, "ö": 21, "ă": 22, "ĭ": 23, "ŗ": 24, "Ď": 25, "Ű": 26, "ġ": 27, "í": 28, "ő": 29, "Û": 30, "ļ": 31, "Ł": 32, "ŵ": 33, "ŏ": 34, "ź": 35, "ƈ": 36, "ŝ": 37, "ä": 38, "ð": 39, "Ñ": 40, "Ƅ": 41, "ć": 42, "î": 43, "Ü": 44, "Ŕ": 45, "ĵ": 46, "ƍ": 47, "Ż": 48, "Ũ": 49, "ŷ": 50, "ċ": 51, "Ħ": 52, "ƅ": 53, "ű": 54, "â": 55, "ĥ": 56, "Ņ": 57, "Ī": 58, "š": 59, "Ą": 60, "Ŵ": 61, "Œ": 62, "Ĺ": 63, "ō": 64, "ē": 65, "Ĝ": 66, "Ö": 67, "đ": 68, "ñ": 69, "ţ": 70, "ř": 71, "Ŋ": 72, "Ï": 73, "ĕ": 74, "Í": 75, "Ş": 76, "İ": 77, "ŭ": 78, "ď": 79, "ā": 80, "ü": 81, "Ċ": 82, "é": 83, "Ɖ": 84, "Ɓ": 85, "ń": 86, "ŋ": 87, "Ŗ": 88, "ų": 89, "Ë": 90, "ç": 91, "Ɔ": 92, "Ƃ": 93, "ó": 94, "Ŷ": 95, "Ì": 96, "ij": 97, "ľ": 98, "ņ": 99, "ė": 100, "ž": 101, "ſ": 102, "Ţ": 103, "Ù": 104, "ý": 105, "É": 106, "Ó": 107, "ø": 108, "Ē": 109, "Ð": 110, "ģ": 111, "Ŝ": 112, "û": 113, "œ": 114, "ʼn": 115, "Ć": 116, "Ê": 117, "Ń": 118, "Ū": 119, "ÿ": 120, "Ų": 121, "ı": 122, "ś": 123, "ô": 124, "ì": 125, "Ĉ": 126, "Ģ": 127, "ě": 128, "ã": 129, "è": 130, "IJ": 131, "ş": 132, "ŧ": 133, "Î": 134, "ą": 135, "ī": 136, "Ĕ": 137, "ú": 138, "Ÿ": 139, "ż": 140, "ƌ": 141, "ū": 142, "Ő": 143, "Ə": 144, "ĺ": 145, "ƀ": 146, "ů": 147, "Ŀ": 148, "Ĥ": 149, "Į": 150, "Ĩ": 151, "ę": 152, "į": 153, "Ř": 154, "ù": 155, "ŕ": 156, "Ŏ": 157, "Ú": 158, "å": 159, "Đ": 160, "ĝ": 161, "Ś": 162, "Ĭ": 163, "Ķ": 164, "ť": 165, "ê": 166, "Ô": 167, "÷": 168, "ũ": 169, "ò": 170, "Ò": 171, "Ť": 172, "þ": 173, "Ů": 174, "È": 175, "Ŭ": 176, "ķ": 177, "Ƌ": 178, "ĉ": 179, "Ļ": 180, "Ž": 181, "ğ": 182, "à": 183, "Ƈ": 184, "Ġ": 185, "õ": 186, "Þ": 187, "Š": 188, "×": 189, "Ě": 190, "Ā": 191, "ł": 192, "ƃ": 193, "Ĵ": 194, "Ė": 195, "ň": 196, "Ŧ": 197, "Ź": 198, "Õ": 199, "[UNK]": 200, "[PAD]": 201}
 
1
+ {"Ĭ": 0, "Ň": 1, "è": 2, "ĩ": 3, "ʼn": 4, "ē": 5, "Ġ": 6, "Ř": 7, "Õ": 8, "Ċ": 9, "ŷ": 10, "ŏ": 11, "É": 12, "Ł": 13, "Ĵ": 14, "Î": 15, "ů": 16, "ė": 17, "ſ": 18, "Ŷ": 19, "ĝ": 20, "Ŗ": 21, "Ɔ": 22, "ĸ": 23, "ű": 24, "ƅ": 25, "Ď": 26, "ć": 27, "ĉ": 28, "Ą": 29, "Í": 30, "Ĕ": 31, "Đ": 32, "Ŏ": 33, "ę": 34, "ĥ": 35, "œ": 36, "ŀ": 37, "ĺ": 38, "ă": 39, "Ö": 40, "Č": 41, "Ķ": 42, "à": 43, "ñ": 44, "Ć": 45, "Þ": 46, "Ŀ": 47, "ş": 48, "č": 49, "Ō": 50, "ċ": 51, "Ě": 52, "ê": 53, "Ź": 54, "Ė": 55, "IJ": 56, "ò": 57, "í": 58, "ń": 59, "Ƌ": 60, "ň": 61, "ö": 62, "ĭ": 63, "ņ": 64, "ž": 65, "ě": 66, "ħ": 67, "ı": 68, "ù": 69, "Û": 70, "Ū": 71, "ô": 72, "ã": 73, "Ŋ": 74, "š": 75, "Ī": 76, "Ē": 77, "Ù": 78, "ï": 79, "î": 80, "ú": 81, "ũ": 82, "ķ": 83, "ƀ": 84, "ŋ": 85, "Ļ": 86, "Ś": 87, "â": 88, "æ": 89, "Ü": 90, "ĵ": 91, "Ń": 92, "Ì": 93, "Ă": 94, "ź": 95, "ū": 96, "Œ": 97, "ä": 98, "×": 99, "Ñ": 100, "İ": 101, "Ž": 102, "Ŝ": 103, "é": 104, "Ā": 105, "ß": 106, "ŵ": 107, "ō": 108, "ó": 109, "Ɖ": 110, "ë": 111, "ij": 112, "Ĝ": 113, "Ŧ": 114, "ĕ": 115, "ƍ": 116, "ƌ": 117, "Ģ": 118, "ƃ": 119, "Ų": 120, "ģ": 121, "ā": 122, "Ű": 123, "Ş": 124, "Ŕ": 125, "Ľ": 126, "ľ": 127, "Ţ": 128, "Ŭ": 129, "Ż": 130, "ď": 131, "ŧ": 132, "ţ": 133, "Ə": 134, "Ů": 135, "ø": 136, "ğ": 137, "Ğ": 138, "õ": 139, "ų": 140, "Ĉ": 141, "Ɓ": 142, "ç": 143, "Ó": 144, "÷": 145, "û": 146, "Ĥ": 147, "Š": 148, "ġ": 149, "ƈ": 150, "Ę": 151, "Ņ": 152, "Ƈ": 153, "ť": 154, "ŕ": 155, "Ĺ": 156, "ļ": 157, "È": 158, "á": 159, "Ø": 160, "đ": 161, "Ť": 162, "ż": 163, "Ũ": 164, "ÿ": 165, "ś": 166, "ŗ": 167, "ő": 168, "ð": 169, "Ŵ": 170, "Ǝ": 171, "Ÿ": 172, "Ɗ": 173, "ŝ": 174, "ī": 175, "ì": 176, "ŭ": 177, "Ò": 178, "Ú": 179, "Ő": 180, "Ƅ": 181, "ý": 182, "Ð": 183, "þ": 184, "Ë": 185, "Ï": 186, "ą": 187, "į": 188, "Ƃ": 189, "å": 190, "Į": 191, "Ħ": 192, "ł": 193, "Ý": 194, "Ĩ": 195, "Ô": 196, "ř": 197, "ü": 198, "Ê": 199, "[UNK]": 200, "[PAD]": 201}