aliasgerovs commited on
Commit
566b7f7
·
1 Parent(s): c6e6c67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -189,8 +189,10 @@ text_mc_tokenizer = AutoTokenizer.from_pretrained(text_mc_model_path)
189
  text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
190
 
191
  def remove_special_characters(text):
192
- cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
193
- return cleaned_text
 
 
194
 
195
  def update_character_count(text):
196
  return f"{len(text)} characters"
 
189
  text_mc_model = AutoModelForSequenceClassification.from_pretrained(text_mc_model_path).to(device)
190
 
191
  def remove_special_characters(text):
192
+ text = remove_accents(text)
193
+ pattern = r'[^\w\s\d.,!?\'"()-;]+'
194
+ text = re.sub(pattern, '', text)
195
+ return text
196
 
197
  def update_character_count(text):
198
  return f"{len(text)} characters"