Spaces:
Runtime error
Runtime error
ValadisCERTH
commited on
Commit
•
7109bc9
1
Parent(s):
9422b91
Update comparativesIdentification.py
Browse files
comparativesIdentification.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import spacy
|
2 |
import re
|
3 |
import nltk
|
@@ -8,7 +9,6 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
8 |
|
9 |
spacy.cli.download("en_core_web_sm")
|
10 |
|
11 |
-
|
12 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
13 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
14 |
|
@@ -369,15 +369,9 @@ def single_verb_comptives(sentence):
|
|
369 |
"""
|
370 |
|
371 |
# base references
|
372 |
-
bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "
|
373 |
-
|
374 |
-
|
375 |
-
lesser_references_sg = ["lag", "trail", "lose", "underperform", "yield", "surrender", "straggle", "dawdle",
|
376 |
-
"lollygag", "loiter", "delay", "defer", "postpone", "procrastinate", "linger", "hesitate",
|
377 |
-
"prolong", "drag"]
|
378 |
-
equal_references_sg = ["match", "equal", "tie", "correspond", "conform", "agree", "harmonize", "coordinate",
|
379 |
-
"comply", "fit", "parallel", "resemble", "mirror", "emulate", "equilibrate", "balance",
|
380 |
-
"counterbalance", "offset", "compensate"]
|
381 |
|
382 |
doc = nlp_comparatives(sentence)
|
383 |
|
@@ -439,9 +433,9 @@ def single_verb_comptives(sentence):
|
|
439 |
# helper functions for 'identify_multi_word_verbs'
|
440 |
|
441 |
# Define multi-word verb lists
|
442 |
-
bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "
|
443 |
-
smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "
|
444 |
-
equal_list = ["is in line with", "is equal to", "is on a par with", "is
|
445 |
|
446 |
# Calculate embeddings of multi-word verbs
|
447 |
bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
|
@@ -717,6 +711,7 @@ def identify_comparatives(sentence):
|
|
717 |
break
|
718 |
|
719 |
unique_output = list(unique_comparatives.values())
|
|
|
720 |
clean_unique_output = []
|
721 |
|
722 |
# this snippet is to handle the extra cases of smaller than or equal to etc
|
|
|
1 |
+
|
2 |
import spacy
|
3 |
import re
|
4 |
import nltk
|
|
|
9 |
|
10 |
spacy.cli.download("en_core_web_sm")
|
11 |
|
|
|
12 |
# use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
|
13 |
nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
|
14 |
|
|
|
369 |
"""
|
370 |
|
371 |
# base references
|
372 |
+
bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outrank", "transcend"]
|
373 |
+
lesser_references_sg = ["subside", "depreciate", "curtail"]
|
374 |
+
equal_references_sg = ["match", "equal", "agree", "comply"]
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
doc = nlp_comparatives(sentence)
|
377 |
|
|
|
433 |
# helper functions for 'identify_multi_word_verbs'
|
434 |
|
435 |
# Define multi-word verb lists
|
436 |
+
bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "is a class apart"]
|
437 |
+
smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "fall short", "fall beneath"]
|
438 |
+
equal_list = ["is in line with", "is equal to", "is on a par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "is equivalent to"]
|
439 |
|
440 |
# Calculate embeddings of multi-word verbs
|
441 |
bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
|
|
|
711 |
break
|
712 |
|
713 |
unique_output = list(unique_comparatives.values())
|
714 |
+
|
715 |
clean_unique_output = []
|
716 |
|
717 |
# this snippet is to handle the extra cases of smaller than or equal to etc
|