Spaces:
Paused
Paused
Commit
•
97adb95
1
Parent(s):
9d5168a
remove periods from the cleaned word
Browse files- .gitignore +2 -1
- utils.py +4 -1
.gitignore
CHANGED
@@ -5,4 +5,5 @@
|
|
5 |
raw/*
|
6 |
results/*
|
7 |
logs/*
|
8 |
-
specificity-model/*
|
|
|
|
5 |
raw/*
|
6 |
results/*
|
7 |
logs/*
|
8 |
+
specificity-model/*
|
9 |
+
specificity-results/*
|
utils.py
CHANGED
@@ -11,7 +11,10 @@ def generate_embedding(model, sentence):
|
|
11 |
def clean_word(input_word):
|
12 |
cleaned_word = re.sub(r'\(.*?\)', '', input_word)
|
13 |
cleaned_word = cleaned_word.strip().lower().replace(", raw", "").replace(" raw", "").replace(", nfs", "").replace(" nfs", "")
|
14 |
-
|
|
|
|
|
|
|
15 |
cleaned_word = cleaned_word.strip()
|
16 |
return cleaned_word
|
17 |
|
|
|
11 |
def clean_word(input_word):
|
12 |
cleaned_word = re.sub(r'\(.*?\)', '', input_word)
|
13 |
cleaned_word = cleaned_word.strip().lower().replace(", raw", "").replace(" raw", "").replace(", nfs", "").replace(" nfs", "")
|
14 |
+
# Remove double or more empty spaces
|
15 |
+
cleaned_word = re.sub(r'\s+', ' ', cleaned_word)
|
16 |
+
# remove periods
|
17 |
+
cleaned_word = cleaned_word.replace(".", "")
|
18 |
cleaned_word = cleaned_word.strip()
|
19 |
return cleaned_word
|
20 |
|