Update tokenizer
Browse files
main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import math
|
2 |
|
3 |
similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
|
4 |
-
letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю"
|
5 |
|
6 |
def countwords(x):
|
7 |
temp = {}
|
@@ -41,6 +41,8 @@ class Chatbot:
|
|
41 |
preprocess += similar_letters[x]
|
42 |
else:
|
43 |
preprocess += x
|
|
|
|
|
44 |
return preprocess.split()
|
45 |
def train(self, data: dict):
|
46 |
lendata = len(data)
|
|
|
1 |
import math
|
2 |
|
3 |
similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
|
4 |
+
letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю "
|
5 |
|
6 |
def countwords(x):
|
7 |
temp = {}
|
|
|
41 |
preprocess += similar_letters[x]
|
42 |
else:
|
43 |
preprocess += x
|
44 |
+
else:
|
45 |
+
preprocess += " "+x+" "
|
46 |
return preprocess.split()
|
47 |
def train(self, data: dict):
|
48 |
lendata = len(data)
|