fix preprocessing
Browse files
README.md
CHANGED
@@ -200,13 +200,13 @@ def normalizer(text, min_ratio=1.1):
|
|
200 |
words = [word.replace("آ", "ا") if "آ" in word and not word.startswith("آ") else word for word in text.split()]
|
201 |
text = " ".join(words)
|
202 |
|
203 |
-
|
204 |
-
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
|
211 |
return text
|
212 |
|
|
|
200 |
words = [word.replace("آ", "ا") if "آ" in word and not word.startswith("آ") else word for word in text.split()]
|
201 |
text = " ".join(words)
|
202 |
|
203 |
+
# if not text or not len(text) > 2:
|
204 |
+
# return None
|
205 |
|
206 |
+
# en_text = re.sub(r"[^" + ENGLISH + "+]", " ", six.ensure_str(text))
|
207 |
+
# en_text = re.sub(r"\s+", " ", en_text)
|
208 |
+
# if len(en_text) > 1:
|
209 |
+
# return None
|
210 |
|
211 |
return text
|
212 |
|