ierhon
/

bayes-chatbot

Model card Files Files and versions Community

ierhon commited on Feb 20

Commit

c358215

•

1 Parent(s): 251ec78

Update tokenizer

Files changed (1) hide show

main.py +3 -1

main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import math
 similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
-letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю"
 def countwords(x):
     temp = {}
@@ -41,6 +41,8 @@ class Chatbot:
                     preprocess += similar_letters[x]
                 else:
                     preprocess += x
         return preprocess.split()
     def train(self, data: dict):
         lendata = len(data)

 import math
 similar_letters = {"й": "и", "ё": "е", "e": "е", "t": "т", "i": "l", "o": "о", "k": "к", "3": "з", "a": "а", "x": "х", "c": "с", "m": "м"}
+letters = "qwertyuiopasdfghjklzxcvbnmёйцукенгшщзхъфывапролджэячсмитьбю "
 def countwords(x):
     temp = {}
                     preprocess += similar_letters[x]
                 else:
                     preprocess += x
+            else:
+                preprocess += " "+x+" "
         return preprocess.split()
     def train(self, data: dict):
         lendata = len(data)