beweinreich commited on
Commit
97adb95
1 Parent(s): 9d5168a

remove periods from the cleaned word

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. utils.py +4 -1
.gitignore CHANGED
@@ -5,4 +5,5 @@
5
  raw/*
6
  results/*
7
  logs/*
8
- specificity-model/*
 
 
5
  raw/*
6
  results/*
7
  logs/*
8
+ specificity-model/*
9
+ specificity-results/*
utils.py CHANGED
@@ -11,7 +11,10 @@ def generate_embedding(model, sentence):
11
  def clean_word(input_word):
12
  cleaned_word = re.sub(r'\(.*?\)', '', input_word)
13
  cleaned_word = cleaned_word.strip().lower().replace(", raw", "").replace(" raw", "").replace(", nfs", "").replace(" nfs", "")
14
- cleaned_word = re.sub(r'\s+', ' ', cleaned_word) # Remove double or more empty spaces
 
 
 
15
  cleaned_word = cleaned_word.strip()
16
  return cleaned_word
17
 
 
11
  def clean_word(input_word):
12
  cleaned_word = re.sub(r'\(.*?\)', '', input_word)
13
  cleaned_word = cleaned_word.strip().lower().replace(", raw", "").replace(" raw", "").replace(", nfs", "").replace(" nfs", "")
14
+ # Remove double or more empty spaces
15
+ cleaned_word = re.sub(r'\s+', ' ', cleaned_word)
16
+ # remove periods
17
+ cleaned_word = cleaned_word.replace(".", "")
18
  cleaned_word = cleaned_word.strip()
19
  return cleaned_word
20