Spaces:
Sleeping
Sleeping
Update Space (evaluate main: c447fc8e)
Browse files- requirements.txt +1 -1
- word_length.py +2 -20
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate.git@
|
2 |
nltk~=3.7
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate.git@c447fc8eda9c62af501bfdc6988919571050d950
|
2 |
nltk~=3.7
|
word_length.py
CHANGED
@@ -12,9 +12,7 @@
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
|
15 |
-
from dataclasses import dataclass
|
16 |
from statistics import mean
|
17 |
-
from typing import Callable, Optional
|
18 |
|
19 |
import datasets
|
20 |
from nltk import word_tokenize
|
@@ -54,22 +52,11 @@ year={2020}
|
|
54 |
"""
|
55 |
|
56 |
|
57 |
-
@dataclass
|
58 |
-
class WordLengthConfig(evaluate.info.Config):
|
59 |
-
|
60 |
-
name: str = "default"
|
61 |
-
|
62 |
-
tokenizer: Optional[Callable] = None
|
63 |
-
|
64 |
-
|
65 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
66 |
class WordLength(evaluate.Measurement):
|
67 |
"""This measurement returns the average number of words in the input string(s)."""
|
68 |
|
69 |
-
|
70 |
-
ALLOWED_CONFIG_NAMES = ["default"]
|
71 |
-
|
72 |
-
def _info(self, config):
|
73 |
# TODO: Specifies the evaluate.MeasurementInfo object
|
74 |
return evaluate.MeasurementInfo(
|
75 |
# This is the description that will appear on the modules page.
|
@@ -77,7 +64,6 @@ class WordLength(evaluate.Measurement):
|
|
77 |
description=_DESCRIPTION,
|
78 |
citation=_CITATION,
|
79 |
inputs_description=_KWARGS_DESCRIPTION,
|
80 |
-
config=config,
|
81 |
# This defines the format of each prediction and reference
|
82 |
features=datasets.Features(
|
83 |
{
|
@@ -91,12 +77,8 @@ class WordLength(evaluate.Measurement):
|
|
91 |
|
92 |
nltk.download("punkt")
|
93 |
|
94 |
-
def _compute(self, data):
|
95 |
"""Returns the average word length of the input data"""
|
96 |
-
if self.config.tokenizer is None:
|
97 |
-
tokenizer = word_tokenize
|
98 |
-
else:
|
99 |
-
tokenizer = self.config.tokenizer
|
100 |
lengths = [len(tokenizer(d)) for d in data]
|
101 |
average_length = mean(lengths)
|
102 |
return {"average_word_length": average_length}
|
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
|
|
|
15 |
from statistics import mean
|
|
|
16 |
|
17 |
import datasets
|
18 |
from nltk import word_tokenize
|
|
|
52 |
"""
|
53 |
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
56 |
class WordLength(evaluate.Measurement):
|
57 |
"""This measurement returns the average number of words in the input string(s)."""
|
58 |
|
59 |
+
def _info(self):
|
|
|
|
|
|
|
60 |
# TODO: Specifies the evaluate.MeasurementInfo object
|
61 |
return evaluate.MeasurementInfo(
|
62 |
# This is the description that will appear on the modules page.
|
|
|
64 |
description=_DESCRIPTION,
|
65 |
citation=_CITATION,
|
66 |
inputs_description=_KWARGS_DESCRIPTION,
|
|
|
67 |
# This defines the format of each prediction and reference
|
68 |
features=datasets.Features(
|
69 |
{
|
|
|
77 |
|
78 |
nltk.download("punkt")
|
79 |
|
80 |
+
def _compute(self, data, tokenizer=word_tokenize):
|
81 |
"""Returns the average word length of the input data"""
|
|
|
|
|
|
|
|
|
82 |
lengths = [len(tokenizer(d)) for d in data]
|
83 |
average_length = mean(lengths)
|
84 |
return {"average_word_length": average_length}
|