ValadisCERTH commited on
Commit
7109bc9
1 Parent(s): 9422b91

Update comparativesIdentification.py

Browse files
Files changed (1) hide show
  1. comparativesIdentification.py +8 -13
comparativesIdentification.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import spacy
2
  import re
3
  import nltk
@@ -8,7 +9,6 @@ from sklearn.metrics.pairwise import cosine_similarity
8
 
9
  spacy.cli.download("en_core_web_sm")
10
 
11
-
12
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
13
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
14
 
@@ -369,15 +369,9 @@ def single_verb_comptives(sentence):
369
  """
370
 
371
  # base references
372
- bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outmatch", "outclass", "eclipse", "overshadow",
373
- "outrank", "overtake", "top", "beat", "transcend", "dominate", "prevail", "trump",
374
- "vanquish", "outperform", "outgun", "outdistance", "outshine"]
375
- lesser_references_sg = ["lag", "trail", "lose", "underperform", "yield", "surrender", "straggle", "dawdle",
376
- "lollygag", "loiter", "delay", "defer", "postpone", "procrastinate", "linger", "hesitate",
377
- "prolong", "drag"]
378
- equal_references_sg = ["match", "equal", "tie", "correspond", "conform", "agree", "harmonize", "coordinate",
379
- "comply", "fit", "parallel", "resemble", "mirror", "emulate", "equilibrate", "balance",
380
- "counterbalance", "offset", "compensate"]
381
 
382
  doc = nlp_comparatives(sentence)
383
 
@@ -439,9 +433,9 @@ def single_verb_comptives(sentence):
439
  # helper functions for 'identify_multi_word_verbs'
440
 
441
  # Define multi-word verb lists
442
- bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "raise the bar", "climb the ladder", "set the standard", "set the pace", "break the mold", "push the envelope", "raise the game", "is a class apart"]
443
- smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "is second to", "bring up the rear", "lose ground", "bring up the tail end", "fall short", "fall beneath", "fail to measure up", "put off"]
444
- equal_list = ["is in line with", "is equal to", "is on a par with", "is on par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "keep pace with", "keep up with", "is equivalent to", "balance out", "even out"]
445
 
446
  # Calculate embeddings of multi-word verbs
447
  bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
@@ -717,6 +711,7 @@ def identify_comparatives(sentence):
717
  break
718
 
719
  unique_output = list(unique_comparatives.values())
 
720
  clean_unique_output = []
721
 
722
  # this snippet is to handle the extra cases of smaller than or equal to etc
 
1
+
2
  import spacy
3
  import re
4
  import nltk
 
9
 
10
  spacy.cli.download("en_core_web_sm")
11
 
 
12
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
13
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
14
 
 
369
  """
370
 
371
  # base references
372
+ bigger_references_sg = ["surpass", "exceed", "outstrip", "outdo", "outrank", "transcend"]
373
+ lesser_references_sg = ["subside", "depreciate", "curtail"]
374
+ equal_references_sg = ["match", "equal", "agree", "comply"]
 
 
 
 
 
 
375
 
376
  doc = nlp_comparatives(sentence)
377
 
 
433
  # helper functions for 'identify_multi_word_verbs'
434
 
435
  # Define multi-word verb lists
436
+ bigger_list = ["is a cut above", "is ahead of", "is superior to", "is greater than", "is a class apart"]
437
+ smaller_list = ["fall behind", "is inferior to", "is smaller than", "lag behind", "trail behind", "fall short", "fall beneath"]
438
+ equal_list = ["is in line with", "is equal to", "is on a par with", "is the same as", "is comparable to", "is in sync with", "is in harmony with", "is in step with", "is in tune with", "is in accord with", "is consistent with", "is consonant with", "is equivalent to"]
439
 
440
  # Calculate embeddings of multi-word verbs
441
  bigger_embeddings = [np.mean([token.vector for token in nlp_comparatives(verb)], axis=0) for verb in bigger_list]
 
711
  break
712
 
713
  unique_output = list(unique_comparatives.values())
714
+
715
  clean_unique_output = []
716
 
717
  # this snippet is to handle the extra cases of smaller than or equal to etc