Spaces:

ValadisCERTH
/

NaturalLanguageModule_complete

Runtime error

App Files Files Community

ValadisCERTH commited on May 10, 2023

Commit

7109bc9

1 Parent(s): 9422b91

Update comparativesIdentification.py

Browse files

Files changed (1) hide show

comparativesIdentification.py +8 -13

comparativesIdentification.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import spacy
 import re
 import nltk
@@ -8,7 +9,6 @@ from sklearn.metrics.pairwise import cosine_similarity
 spacy.cli.download("en_core_web_sm")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
@@ -369,15 +369,9 @@ def single_verb_comptives(sentence):
     """
     # base references
-    bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outmatch", "outclass", "eclipse", "overshadow",
-                            "outrank", "overtake", "top", "beat", "transcend", "dominate", "prevail", "trump",
-                            "vanquish", "outperform", "outgun", "outdistance", "outshine"]
-    lesser_references_sg = ["lag", "trail", "lose", "underperform", "yield", "surrender", "straggle", "dawdle",
-                            "lollygag", "loiter", "delay", "defer", "postpone", "procrastinate", "linger", "hesitate",
-                            "prolong", "drag"]
-    equal_references_sg = ["match", "equal", "tie", "correspond", "conform", "agree", "harmonize", "coordinate",
-                           "comply", "fit", "parallel", "resemble", "mirror", "emulate", "equilibrate", "balance",
-                           "counterbalance", "offset", "compensate"]
     doc = nlp_comparatives(sentence)
@@ -439,9 +433,9 @@ def single_verb_comptives(sentence):
 # helper functions for 'identify_multi_word_verbs'
 # Define multi-word verb lists
-bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "raise the bar", "climb the ladder", "set the standard", "set the pace", "break the mold", "push the envelope", "raise the game", "is a class apart"]
-smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "is second to", "bring up the rear", "lose ground", "bring up the tail end", "fall short", "fall beneath", "fail to measure up", "put off"]
-equal_list = ["is in line with", "is equal to", "is on a par with", "is on par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "keep pace with", "keep up with", "is equivalent to", "balance out", "even out"]
 # Calculate embeddings of multi-word verbs
 bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
@@ -717,6 +711,7 @@ def identify_comparatives(sentence):
                     break
     unique_output = list(unique_comparatives.values())
     clean_unique_output = []
     # this snippet is to handle the extra cases of smaller than or equal to etc

 import spacy
 import re
 import nltk
 spacy.cli.download("en_core_web_sm")
 # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
 nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
     """
     # base references
+    bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outrank", "transcend"]
+    lesser_references_sg = ["subside", "depreciate", "curtail"]
+    equal_references_sg = ["match", "equal", "agree", "comply"]
     doc = nlp_comparatives(sentence)
 # helper functions for 'identify_multi_word_verbs'
 # Define multi-word verb lists
+bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "is a class apart"]
+smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "fall short", "fall beneath"]
+equal_list = ["is in line with", "is equal to", "is on a par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "is equivalent to"]
 # Calculate embeddings of multi-word verbs
 bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
                     break
     unique_output = list(unique_comparatives.values())
     clean_unique_output = []
     # this snippet is to handle the extra cases of smaller than or equal to etc