truong-xuan-linh commited on
Commit
45bafcb
1 Parent(s): 79309e0

update remove_special_characters

Browse files
Files changed (1) hide show
  1. src/model.py +2 -2
src/model.py CHANGED
@@ -29,7 +29,7 @@ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
29
 
30
  def remove_special_characters(sentence):
31
  # Use regular expression to keep only letters, periods, and commas
32
- sentence_after_removal = re.sub(r'[^a-zA-Z\s,.\u00C0-\u1EF9]', ' ', sentence)
33
  return sentence_after_removal
34
 
35
  from scipy.signal import butter, lfilter
@@ -106,13 +106,13 @@ class Model():
106
  full_speech = []
107
  separators = r";|\.|!|\?|\n"
108
  text = uroman_normalization(text)
 
109
  text = text.replace(" ", "▁")
110
  split_texts = re.split(separators, text)
111
 
112
  for split_text in split_texts:
113
 
114
  if split_text != "▁":
115
- # split_text = remove_special_characters(" ," + split_text) + " ,"
116
  split_text = split_text.lower() + "▁"
117
  print(split_text)
118
  inputs = self.processor.tokenizer(text=split_text, return_tensors="pt")
 
29
 
30
  def remove_special_characters(sentence):
31
  # Use regular expression to keep only letters, periods, and commas
32
+ sentence_after_removal = re.sub(r'[^a-zA-Z\s,.\u00C0-\u1EF9]', ' ,', sentence)
33
  return sentence_after_removal
34
 
35
  from scipy.signal import butter, lfilter
 
106
  full_speech = []
107
  separators = r";|\.|!|\?|\n"
108
  text = uroman_normalization(text)
109
+ text = remove_special_characters(text)
110
  text = text.replace(" ", "▁")
111
  split_texts = re.split(separators, text)
112
 
113
  for split_text in split_texts:
114
 
115
  if split_text != "▁":
 
116
  split_text = split_text.lower() + "▁"
117
  print(split_text)
118
  inputs = self.processor.tokenizer(text=split_text, return_tensors="pt")