m3hrdadfi commited on
Commit
9191419
1 Parent(s): d97cff0

fix preprocessing

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -200,13 +200,13 @@ def normalizer(text, min_ratio=1.1):
200
  words = [word.replace("آ", "ا") if "آ" in word and not word.startswith("آ") else word for word in text.split()]
201
  text = " ".join(words)
202
 
203
- if not text or not len(text) > 2:
204
- return None
205
 
206
- en_text = re.sub(r"[^" + ENGLISH + "+]", " ", six.ensure_str(text))
207
- en_text = re.sub(r"\s+", " ", en_text)
208
- if len(en_text) > 1:
209
- return None
210
 
211
  return text
212
 
 
200
  words = [word.replace("آ", "ا") if "آ" in word and not word.startswith("آ") else word for word in text.split()]
201
  text = " ".join(words)
202
 
203
+ # if not text or not len(text) > 2:
204
+ # return None
205
 
206
+ # en_text = re.sub(r"[^" + ENGLISH + "+]", " ", six.ensure_str(text))
207
+ # en_text = re.sub(r"\s+", " ", en_text)
208
+ # if len(en_text) > 1:
209
+ # return None
210
 
211
  return text
212