m3hrdadfi commited on
Commit
63bfb6b
β€’
1 Parent(s): 52e3b9f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -68,7 +68,7 @@ chars_to_ignore = [
68
  "β€œ", "%", "β€˜", "οΏ½", "–", "…", "_", "”", 'β€œ', 'β€ž'
69
  ]
70
  chars_to_mapping = {
71
- "\\\\u200c": " ", "\\\\u200d": " ", "\\\\u200e": " ", "\\\\u200f": " ", "\\\\ufeff": " ",
72
  }
73
 
74
  def multiple_replace(text, chars_to_mapping):
@@ -83,7 +83,7 @@ def normalizer(batch, chars_to_ignore, chars_to_mapping):
83
  chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
84
  text = batch["sentence"].lower().strip()
85
 
86
- text = text.replace("\\\\u0307", " ").strip()
87
  text = multiple_replace(text, chars_to_mapping)
88
  text = remove_special_characters(text, chars_to_ignore_regex)
89
 
@@ -194,7 +194,7 @@ chars_to_ignore = [
194
  "β€œ", "%", "β€˜", "οΏ½", "–", "…", "_", "”", 'β€œ', 'β€ž'
195
  ]
196
  chars_to_mapping = {
197
- "\\\\u200c": " ", "\\\\u200d": " ", "\\\\u200e": " ", "\\\\u200f": " ", "\\\\ufeff": " ",
198
  }
199
 
200
  def multiple_replace(text, chars_to_mapping):
@@ -209,7 +209,7 @@ def normalizer(batch, chars_to_ignore, chars_to_mapping):
209
  chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
210
  text = batch["sentence"].lower().strip()
211
 
212
- text = text.replace("\\\\u0307", " ").strip()
213
  text = multiple_replace(text, chars_to_mapping)
214
  text = remove_special_characters(text, chars_to_ignore_regex)
215
 
 
68
  "β€œ", "%", "β€˜", "οΏ½", "–", "…", "_", "”", 'β€œ', 'β€ž'
69
  ]
70
  chars_to_mapping = {
71
+ "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
72
  }
73
 
74
  def multiple_replace(text, chars_to_mapping):
 
83
  chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
84
  text = batch["sentence"].lower().strip()
85
 
86
+ text = text.replace("\\\\\\\\u0307", " ").strip()
87
  text = multiple_replace(text, chars_to_mapping)
88
  text = remove_special_characters(text, chars_to_ignore_regex)
89
 
 
194
  "β€œ", "%", "β€˜", "οΏ½", "–", "…", "_", "”", 'β€œ', 'β€ž'
195
  ]
196
  chars_to_mapping = {
197
+ "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
198
  }
199
 
200
  def multiple_replace(text, chars_to_mapping):
 
209
  chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
210
  text = batch["sentence"].lower().strip()
211
 
212
+ text = text.replace("\\\\\\\\u0307", " ").strip()
213
  text = multiple_replace(text, chars_to_mapping)
214
  text = remove_special_characters(text, chars_to_ignore_regex)
215