altndrr commited on
Commit
f3261e5
1 Parent(s): 0dc766b

Remove unused code

Browse files
Files changed (1) hide show
  1. transforms_cased.py +2 -28
transforms_cased.py CHANGED
@@ -160,14 +160,12 @@ class FilterPOS(BaseTextTransform):
160
  Args:
161
  tags (list): List of POS tags to remove.
162
  engine (str): POS tagger to use. Must be one of "nltk" or "flair". Defaults to "nltk".
163
- keep_compound_nouns (bool): Whether to keep composed words. Defaults to True.
164
  """
165
 
166
- def __init__(self, tags: list, engine: str = "nltk", keep_compound_nouns: bool = True) -> None:
167
  super().__init__()
168
  self.tags = tags
169
  self.engine = engine
170
- self.keep_compound_nouns = keep_compound_nouns
171
 
172
  if engine == "nltk":
173
  nltk.download("averaged_perceptron_tagger", quiet=True)
@@ -189,30 +187,6 @@ class FilterPOS(BaseTextTransform):
189
  self.tagger(sentence)
190
  text = " ".join([token.text for token in sentence.tokens if token.tag in self.tags])
191
 
192
- if self.keep_compound_nouns:
193
- compound_nouns = []
194
-
195
- if self.engine == "nltk":
196
- for i in range(len(word_tags) - 1):
197
- if word_tags[i][1] == "NN" and word_tags[i + 1][1] == "NN":
198
- # if they are the same word, skip
199
- if word_tags[i][0] == word_tags[i + 1][0]:
200
- continue
201
-
202
- compound_noun = word_tags[i][0] + "_" + word_tags[i + 1][0]
203
- compound_nouns.append(compound_noun)
204
- elif self.engine == "flair":
205
- for i in range(len(sentence.tokens) - 1):
206
- if sentence.tokens[i].tag == "NN" and sentence.tokens[i + 1].tag == "NN":
207
- # if they are the same word, skip
208
- if sentence.tokens[i].text == sentence.tokens[i + 1].text:
209
- continue
210
-
211
- compound_noun = sentence.tokens[i].text + "_" + sentence.tokens[i + 1].text
212
- compound_nouns.append(compound_noun)
213
-
214
- text = " ".join([text, " ".join(compound_nouns)])
215
-
216
  return text
217
 
218
  def __repr__(self) -> str:
@@ -396,7 +370,7 @@ def default_vocabulary_transforms() -> TextCompose:
396
  transforms.append(ToSingular())
397
  transforms.append(DropWords(words=words_to_drop))
398
  transforms.append(FrequencyMinWordCount(min_count=2))
399
- transforms.append(FilterPOS(tags=pos_tags, engine="flair", keep_compound_nouns=False))
400
  transforms.append(RemoveDuplicates())
401
 
402
  transforms = TextCompose(transforms)
 
160
  Args:
161
  tags (list): List of POS tags to remove.
162
  engine (str): POS tagger to use. Must be one of "nltk" or "flair". Defaults to "nltk".
 
163
  """
164
 
165
+ def __init__(self, tags: list, engine: str = "nltk") -> None:
166
  super().__init__()
167
  self.tags = tags
168
  self.engine = engine
 
169
 
170
  if engine == "nltk":
171
  nltk.download("averaged_perceptron_tagger", quiet=True)
 
187
  self.tagger(sentence)
188
  text = " ".join([token.text for token in sentence.tokens if token.tag in self.tags])
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  return text
191
 
192
  def __repr__(self) -> str:
 
370
  transforms.append(ToSingular())
371
  transforms.append(DropWords(words=words_to_drop))
372
  transforms.append(FrequencyMinWordCount(min_count=2))
373
+ transforms.append(FilterPOS(tags=pos_tags, engine="flair"))
374
  transforms.append(RemoveDuplicates())
375
 
376
  transforms = TextCompose(transforms)