lvwerra HF staff commited on
Commit
2ce3448
·
1 Parent(s): 7a9f6c9

Update Space (evaluate main: e4a27243)

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. word_count.py +18 -3
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- git+https://github.com/huggingface/evaluate.git@80448674f5447a9682afe051db243c4a13bfe4ff
2
  sklearn~=0.0
 
1
+ git+https://github.com/huggingface/evaluate.git@e4a2724377909fe2aeb4357e3971e5a569673b39
2
  sklearn~=0.0
word_count.py CHANGED
@@ -12,6 +12,9 @@
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
 
 
 
15
  import datasets
16
  from sklearn.feature_extraction.text import CountVectorizer
17
 
@@ -41,18 +44,30 @@ Examples:
41
  _CITATION = ""
42
 
43
 
 
 
 
 
 
 
 
 
44
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
45
  class WordCount(evaluate.Measurement):
46
  """This measurement returns the total number of words and the number of unique words
47
  in the input string(s)."""
48
 
49
- def _info(self):
 
 
 
50
  return evaluate.MeasurementInfo(
51
  # This is the description that will appear on the modules page.
52
  module_type="measurement",
53
  description=_DESCRIPTION,
54
  citation=_CITATION,
55
  inputs_description=_KWARGS_DESCRIPTION,
 
56
  features=datasets.Features(
57
  {
58
  "data": datasets.Value("string"),
@@ -60,9 +75,9 @@ class WordCount(evaluate.Measurement):
60
  ),
61
  )
62
 
63
- def _compute(self, data, max_vocab=None):
64
  """Returns the number of unique words in the input data"""
65
- count_vectorizer = CountVectorizer(max_features=max_vocab)
66
  document_matrix = count_vectorizer.fit_transform(data)
67
  word_count = document_matrix.sum()
68
  unique_words = document_matrix.shape[1]
 
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
 
15
+ from dataclasses import dataclass
16
+ from typing import Optional
17
+
18
  import datasets
19
  from sklearn.feature_extraction.text import CountVectorizer
20
 
 
44
  _CITATION = ""
45
 
46
 
47
+ @dataclass
48
+ class WordCount(evaluate.info.Config):
49
+
50
+ name: str = "default"
51
+
52
+ max_vocab: Optional[int] = None
53
+
54
+
55
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
56
  class WordCount(evaluate.Measurement):
57
  """This measurement returns the total number of words and the number of unique words
58
  in the input string(s)."""
59
 
60
+ CONFIG_CLASS = WordCount
61
+ ALLOWED_CONFIG_NAMES = ["default"]
62
+
63
+ def _info(self, config):
64
  return evaluate.MeasurementInfo(
65
  # This is the description that will appear on the modules page.
66
  module_type="measurement",
67
  description=_DESCRIPTION,
68
  citation=_CITATION,
69
  inputs_description=_KWARGS_DESCRIPTION,
70
+ config=config,
71
  features=datasets.Features(
72
  {
73
  "data": datasets.Value("string"),
 
75
  ),
76
  )
77
 
78
+ def _compute(self, data):
79
  """Returns the number of unique words in the input data"""
80
+ count_vectorizer = CountVectorizer(max_features=self.config.max_vocab)
81
  document_matrix = count_vectorizer.fit_transform(data)
82
  word_count = document_matrix.sum()
83
  unique_words = document_matrix.shape[1]