ValadisCERTH commited on
Commit
e28e68e
1 Parent(s): b6216d1

Update comparativesIdentification.py

Browse files
Files changed (1) hide show
  1. comparativesIdentification.py +16 -6
comparativesIdentification.py CHANGED
@@ -6,6 +6,8 @@ import numpy as np
6
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
 
 
9
 
10
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
11
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
@@ -45,12 +47,15 @@ def find_comptives_straight_patterns(sentence):
45
 
46
  # find mentions of "equal" followed by "to"
47
  if token.text.lower() == "equal":
 
48
  next_token = token.nbor()
49
 
50
  if next_token.text.lower() == "to":
 
51
  prev_token = token.nbor(-1)
52
 
53
  if prev_token.pos_ == "NOUN":
 
54
  # comparatives.append({'comparative': ["equal to", "="]})
55
  comparatives.append({'comparative': "="})
56
 
@@ -60,18 +65,22 @@ def find_comptives_straight_patterns(sentence):
60
  next_token = token.nbor()
61
 
62
  if next_token.text.lower() == "than":
 
63
  prev_token = token.nbor(-1)
64
 
65
  if token.text.lower() == 'more':
 
66
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
67
  comparatives.append({'comparative': '>'})
68
 
69
  elif token.text.lower() == 'less':
 
70
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
71
  comparatives.append({'comparative': '<'})
72
 
73
  # find mentions of comparative adjectives or comparative adverbs followed by "than"
74
  elif token.tag_ == "JJR" or token.tag_ == "RBR":
 
75
  next_token = token.nbor()
76
 
77
  if next_token.text.lower() == "than" and next_token.nbor().pos_ != "NOUN":
@@ -79,9 +88,7 @@ def find_comptives_straight_patterns(sentence):
79
  # check if the token is a synonym of "bigger"
80
 
81
  # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
82
- big_synonyms = set(
83
- wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets(
84
- 'huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets(
85
  'strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets(
86
  'immense') + wordnet.synsets('substantial'))
87
  bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets(
@@ -89,17 +96,19 @@ def find_comptives_straight_patterns(sentence):
89
  'heavier') + wordnet.synsets('stronger'))
90
 
91
  bigger_related_words = big_synonyms.union(bigger_synonyms)
 
92
  bigger_rel_words = [word.name().split('.')[0] for word in bigger_related_words]
93
 
94
  flag_bigger = 0
95
 
96
  if token.text.lower() in bigger_rel_words:
 
97
  flag_bigger = 1
98
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
99
  comparatives.append({'comparative': '>'})
100
 
101
  # if no synonym of bigger was found, check for smaller synsets
102
- if not flag_bigger:
103
 
104
  # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
105
  small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets(
@@ -113,6 +122,7 @@ def find_comptives_straight_patterns(sentence):
113
  smaller_rel_words = [word.name().split('.')[0] for word in smaller_related_words]
114
 
115
  if token.text.lower() in smaller_rel_words:
 
116
  flag_bigger = 0
117
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
118
  comparatives.append({'comparative': '<'})
@@ -668,7 +678,7 @@ def identify_comparatives(sentence):
668
  # Identify straightforward patterns
669
  straight_comptives = find_comptives_straight_patterns(sentence)
670
 
671
- # Identify advanced bigger/smaller comparatives
672
  bigger_smaller_comparatives = identify_bigger_smaller_advanced(sentence)
673
 
674
  # Identify advanced equal-to comparatives
@@ -733,7 +743,7 @@ def identify_comparatives(sentence):
733
 
734
 
735
  def comparatives_binding(sentence):
736
-
737
  try:
738
  comparative_symbols = find_comptives_symbols(sentence)
739
  comparative_mentions = identify_comparatives(sentence)
 
6
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
 
9
+ spacy.cli.download("en_core_web_sm")
10
+
11
 
12
  # use spacy small because in that way we are closer to a BOW model which is the one we care in our case since we just compare words
13
  nlp_comparatives = spacy.load('en_core_web_sm', disable=["parser", "ner"])
 
47
 
48
  # find mentions of "equal" followed by "to"
49
  if token.text.lower() == "equal":
50
+
51
  next_token = token.nbor()
52
 
53
  if next_token.text.lower() == "to":
54
+
55
  prev_token = token.nbor(-1)
56
 
57
  if prev_token.pos_ == "NOUN":
58
+
59
  # comparatives.append({'comparative': ["equal to", "="]})
60
  comparatives.append({'comparative': "="})
61
 
 
65
  next_token = token.nbor()
66
 
67
  if next_token.text.lower() == "than":
68
+
69
  prev_token = token.nbor(-1)
70
 
71
  if token.text.lower() == 'more':
72
+
73
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
74
  comparatives.append({'comparative': '>'})
75
 
76
  elif token.text.lower() == 'less':
77
+
78
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
79
  comparatives.append({'comparative': '<'})
80
 
81
  # find mentions of comparative adjectives or comparative adverbs followed by "than"
82
  elif token.tag_ == "JJR" or token.tag_ == "RBR":
83
+
84
  next_token = token.nbor()
85
 
86
  if next_token.text.lower() == "than" and next_token.nbor().pos_ != "NOUN":
 
88
  # check if the token is a synonym of "bigger"
89
 
90
  # retrieve a set of synonyms for the concepts of 'big' and 'bigger'
91
+ big_synonyms = set(wordnet.synsets('big') + wordnet.synsets('large') + wordnet.synsets('great') + wordnet.synsets('huge') + wordnet.synsets('enormous') + wordnet.synsets('heavy') + wordnet.synsets(
 
 
92
  'strong') + wordnet.synsets('enormous') + wordnet.synsets('massive') + wordnet.synsets(
93
  'immense') + wordnet.synsets('substantial'))
94
  bigger_synonyms = set(wordnet.synsets('bigger') + wordnet.synsets('larger') + wordnet.synsets(
 
96
  'heavier') + wordnet.synsets('stronger'))
97
 
98
  bigger_related_words = big_synonyms.union(bigger_synonyms)
99
+
100
  bigger_rel_words = [word.name().split('.')[0] for word in bigger_related_words]
101
 
102
  flag_bigger = 0
103
 
104
  if token.text.lower() in bigger_rel_words:
105
+
106
  flag_bigger = 1
107
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '>']})
108
  comparatives.append({'comparative': '>'})
109
 
110
  # if no synonym of bigger was found, check for smaller synsets
111
+ if flag_bigger==0:
112
 
113
  # retrieve a set of synonyms for the concepts of 'small' and 'smaller'
114
  small_synonyms = set(wordnet.synsets('small') + wordnet.synsets('little') + wordnet.synsets(
 
122
  smaller_rel_words = [word.name().split('.')[0] for word in smaller_related_words]
123
 
124
  if token.text.lower() in smaller_rel_words:
125
+
126
  flag_bigger = 0
127
  # comparatives.append({'comparative': [token.text + " " + next_token.text, '<']})
128
  comparatives.append({'comparative': '<'})
 
678
  # Identify straightforward patterns
679
  straight_comptives = find_comptives_straight_patterns(sentence)
680
 
681
+ # Identify advanced bigger/smaller comparativesunknown_error
682
  bigger_smaller_comparatives = identify_bigger_smaller_advanced(sentence)
683
 
684
  # Identify advanced equal-to comparatives
 
743
 
744
 
745
  def comparatives_binding(sentence):
746
+ #
747
  try:
748
  comparative_symbols = find_comptives_symbols(sentence)
749
  comparative_mentions = identify_comparatives(sentence)