lvwerra HF staff commited on
Commit
f5b1b3f
1 Parent(s): 2ce3448

Update Space (evaluate main: c447fc8e)

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. word_count.py +3 -18
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- git+https://github.com/huggingface/evaluate.git@e4a2724377909fe2aeb4357e3971e5a569673b39
2
  sklearn~=0.0
 
1
+ git+https://github.com/huggingface/evaluate.git@c447fc8eda9c62af501bfdc6988919571050d950
2
  sklearn~=0.0
word_count.py CHANGED
@@ -12,9 +12,6 @@
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
15
- from dataclasses import dataclass
16
- from typing import Optional
17
-
18
  import datasets
19
  from sklearn.feature_extraction.text import CountVectorizer
20
 
@@ -44,30 +41,18 @@ Examples:
44
  _CITATION = ""
45
 
46
 
47
- @dataclass
48
- class WordCount(evaluate.info.Config):
49
-
50
- name: str = "default"
51
-
52
- max_vocab: Optional[int] = None
53
-
54
-
55
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
56
  class WordCount(evaluate.Measurement):
57
  """This measurement returns the total number of words and the number of unique words
58
  in the input string(s)."""
59
 
60
- CONFIG_CLASS = WordCount
61
- ALLOWED_CONFIG_NAMES = ["default"]
62
-
63
- def _info(self, config):
64
  return evaluate.MeasurementInfo(
65
  # This is the description that will appear on the modules page.
66
  module_type="measurement",
67
  description=_DESCRIPTION,
68
  citation=_CITATION,
69
  inputs_description=_KWARGS_DESCRIPTION,
70
- config=config,
71
  features=datasets.Features(
72
  {
73
  "data": datasets.Value("string"),
@@ -75,9 +60,9 @@ class WordCount(evaluate.Measurement):
75
  ),
76
  )
77
 
78
- def _compute(self, data):
79
  """Returns the number of unique words in the input data"""
80
- count_vectorizer = CountVectorizer(max_features=self.config.max_vocab)
81
  document_matrix = count_vectorizer.fit_transform(data)
82
  word_count = document_matrix.sum()
83
  unique_words = document_matrix.shape[1]
 
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
 
 
 
15
  import datasets
16
  from sklearn.feature_extraction.text import CountVectorizer
17
 
 
41
  _CITATION = ""
42
 
43
 
 
 
 
 
 
 
 
 
44
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
45
  class WordCount(evaluate.Measurement):
46
  """This measurement returns the total number of words and the number of unique words
47
  in the input string(s)."""
48
 
49
+ def _info(self):
 
 
 
50
  return evaluate.MeasurementInfo(
51
  # This is the description that will appear on the modules page.
52
  module_type="measurement",
53
  description=_DESCRIPTION,
54
  citation=_CITATION,
55
  inputs_description=_KWARGS_DESCRIPTION,
 
56
  features=datasets.Features(
57
  {
58
  "data": datasets.Value("string"),
 
60
  ),
61
  )
62
 
63
+ def _compute(self, data, max_vocab=None):
64
  """Returns the number of unique words in the input data"""
65
+ count_vectorizer = CountVectorizer(max_features=max_vocab)
66
  document_matrix = count_vectorizer.fit_transform(data)
67
  word_count = document_matrix.sum()
68
  unique_words = document_matrix.shape[1]