lvwerra HF staff commited on
Commit
34d5c3d
1 Parent(s): 2a6f858

Update Space (evaluate main: e51c679b)

Browse files
Files changed (4) hide show
  1. README.md +53 -6
  2. app.py +6 -0
  3. exact_match.py +65 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,59 @@
1
  ---
2
- title: Exact_match
3
- emoji:
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 3.0.9
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Exact Match
3
+ emoji: 🤗
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.0.2
8
  app_file: app.py
9
  pinned: false
10
+ tags:
11
+ - evaluate
12
+ - comparison
13
  ---
14
 
15
+
16
+ # Comparison Card for Exact Match
17
+
18
+ ## Comparison description
19
+
20
+ Given two model predictions the exact match score is 1 if they are the exact same, and is 0 otherwise. The overall exact match score is the average.
21
+
22
+ - **Example 1**: The exact match score if prediction 1.0 is [0, 1] is 0, given prediction 2 is [0, 1].
23
+ - **Example 2**: The exact match score if prediction 0.0 is [0, 1] is 0, given prediction 2 is [1, 0].
24
+ - **Example 3**: The exact match score if prediction 0.5 is [0, 1] is 0, given prediction 2 is [1, 1].
25
+
26
+ ## How to use
27
+
28
+ At minimum, this metric takes as input predictions and references:
29
+ ```python
30
+ >>> exact_match = evaluate.load("exact_match", module_type="comparison")
31
+ >>> results = exact_match.compute(predictions1=[0, 1, 1], predictions2=[1, 1, 1])
32
+ >>> print(results)
33
+ {'exact_match': 0.66}
34
+ ```
35
+
36
+ ## Output values
37
+
38
+ Returns a float between 0.0 and 1.0 inclusive.
39
+
40
+ ## Examples
41
+
42
+ ```python
43
+ >>> exact_match = evaluate.load("exact_match", module_type="comparison")
44
+ >>> results = exact_match.compute(predictions1=[0, 0, 0], predictions2=[1, 1, 1])
45
+ >>> print(results)
46
+ {'exact_match': 1.0}
47
+ ```
48
+
49
+ ```python
50
+ >>> exact_match = evaluate.load("exact_match", module_type="comparison")
51
+ >>> results = exact_match.compute(predictions1=[0, 1, 1], predictions2=[1, 1, 1])
52
+ >>> print(results)
53
+ {'exact_match': 0.66}
54
+ ```
55
+
56
+
57
+ ## Limitations and bias
58
+
59
+ ## Citations
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
+
4
+
5
+ module = evaluate.load("exact_match", module_type="comparison")
6
+ launch_gradio_widget(module)
exact_match.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 The HuggingFace Evaluate Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Exact match test for model comparison."""
15
+
16
+ import datasets
17
+ import numpy as np
18
+
19
+ import evaluate
20
+
21
+
22
+ _DESCRIPTION = """
23
+ Returns the rate at which the predictions of one model exactly match those of another model.
24
+ """
25
+
26
+
27
+ _KWARGS_DESCRIPTION = """
28
+ Args:
29
+ predictions1 (`list` of `int`): Predicted labels for model 1.
30
+ predictions2 (`list` of `int`): Predicted labels for model 2.
31
+
32
+ Returns:
33
+ exact_match (`float`): Dictionary containing exact_match rate. Possible values are between 0.0 and 1.0, inclusive.
34
+
35
+ Examples:
36
+ >>> exact_match = evaluate.load("exact_match", module_type="comparison")
37
+ >>> results = exact_match.compute(predictions1=[1, 1, 1], predictions2=[1, 1, 1])
38
+ >>> print(results)
39
+ {'exact_match': 1.0}
40
+ """
41
+
42
+
43
+ _CITATION = """
44
+ """
45
+
46
+
47
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
48
+ class ExactMatch(evaluate.EvaluationModule):
49
+ def _info(self):
50
+ return evaluate.EvaluationModuleInfo(
51
+ module_type="comparison",
52
+ description=_DESCRIPTION,
53
+ citation=_CITATION,
54
+ inputs_description=_KWARGS_DESCRIPTION,
55
+ features=datasets.Features(
56
+ {
57
+ "predictions1": datasets.Value("int64"),
58
+ "predictions2": datasets.Value("int64"),
59
+ }
60
+ ),
61
+ )
62
+
63
+ def _compute(self, predictions1, predictions2):
64
+ score_list = predictions1 == predictions2
65
+ return {"exact_match": np.mean(score_list)}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # TODO: fix github to release
2
+ git+https://github.com/huggingface/evaluate.git@505123230059f9605da8951880eddc9d1fbf4278
3
+ datasets~=2.0
4
+ scipy