altndrr commited on
Commit
aa4749c
1 Parent(s): 101e533

Remove unused transform

Browse files
Files changed (1) hide show
  1. transforms_cased.py +0 -38
transforms_cased.py CHANGED
@@ -17,7 +17,6 @@ __all__ = [
17
  "DropWords",
18
  "FilterPOS",
19
  "FrequencyMinWordCount",
20
- "FrequencyTopK",
21
  "ReplaceSeparators",
22
  "ToLowercase",
23
  "ToSingular",
@@ -257,43 +256,6 @@ class FrequencyMinWordCount(BaseTextTransform):
257
  return f"{self.__class__.__name__}(min_count={self.min_count})"
258
 
259
 
260
- class FrequencyTopK(BaseTextTransform):
261
- """Keep only the top k most frequent words in the input text.
262
-
263
- In case of a tie, all words with the same count as the last word are kept.
264
-
265
- Args:
266
- top_k (int): Number of top words to keep.
267
- """
268
-
269
- def __init__(self, top_k: int) -> None:
270
- super().__init__()
271
- self.top_k = top_k
272
-
273
- def __call__(self, text: str) -> str:
274
- """
275
- Args:
276
- text (str): Text to remove infrequent words from.
277
- """
278
- if self.top_k < 1:
279
- return text
280
-
281
- words = text.split()
282
- word_counts = {word: words.count(word) for word in words}
283
- top_words = sorted(word_counts, key=word_counts.get, reverse=True)
284
-
285
- # in case of a tie, keep all words with the same count
286
- top_words = top_words[: self.top_k]
287
- top_words = [word for word in top_words if word_counts[word] == word_counts[top_words[-1]]]
288
-
289
- text = " ".join([word for word in words if word in top_words])
290
-
291
- return text
292
-
293
- def __repr__(self) -> str:
294
- return f"{self.__class__.__name__}(top_k={self.top_k})"
295
-
296
-
297
  class ReplaceSeparators(BaseTextTransform):
298
  """Replace underscores and dashes with spaces."""
299
 
 
17
  "DropWords",
18
  "FilterPOS",
19
  "FrequencyMinWordCount",
 
20
  "ReplaceSeparators",
21
  "ToLowercase",
22
  "ToSingular",
 
256
  return f"{self.__class__.__name__}(min_count={self.min_count})"
257
 
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  class ReplaceSeparators(BaseTextTransform):
260
  """Replace underscores and dashes with spaces."""
261