Spaces:

lsy641
/

distinct

Runtime error

lsy641 commited on Jul 8, 2023

Commit

c4f50af

•

1 Parent(s): 9541183

distinct

Files changed (1) hide show

tokenizer_13a.py CHANGED Viewed

@@ -67,6 +67,7 @@ class TokenizerRegexp(BaseTokenizer):
         # no leading or trailing spaces, single space within words
         # return ' '.join(line.split())
         # This line is changed with regards to the original tokenizer (seen above) to return individual words
         return line.split()
@@ -96,6 +97,7 @@ class Tokenizer13a(BaseTokenizer):
             line = line.replace("&amp;", "&")
             line = line.replace("&lt;", "<")
             line = line.replace("&gt;", ">")
         return self._post_tokenizer(f" {line} ")

         # no leading or trailing spaces, single space within words
         # return ' '.join(line.split())
         # This line is changed with regards to the original tokenizer (seen above) to return individual words
+        print(line)
         return line.split()
             line = line.replace("&amp;", "&")
             line = line.replace("&lt;", "<")
             line = line.replace("&gt;", ">")
+        print(line)
         return self._post_tokenizer(f" {line} ")