berkatil commited on
Commit
c01271c
1 Parent(s): 23afbaa

implementation is added

Browse files
Files changed (3) hide show
  1. mrr.py +48 -45
  2. requirements.txt +2 -1
  3. tests.py +0 -17
mrr.py CHANGED
@@ -11,58 +11,59 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
- """TODO: Add a description here."""
15
 
16
  import evaluate
17
  import datasets
 
 
 
18
 
19
 
20
- # TODO: Add BibTeX citation
21
  _CITATION = """\
22
- @InProceedings{huggingface:module,
23
- title = {A great new module},
24
- authors={huggingface, Inc.},
25
- year={2020}
 
 
 
 
 
 
26
  }
27
  """
28
 
29
- # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
 
32
  """
33
 
34
 
35
- # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
- Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
 
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
  Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
-
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
 
 
52
  >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
-
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
- class mrr(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
-
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
  # This is the description that will appear on the modules page.
68
  module_type="metric",
@@ -71,25 +72,27 @@ class mrr(evaluate.Metric):
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
 
76
  }),
77
  # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
- # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
88
-
89
- def _compute(self, predictions, references):
90
  """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
  }
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+ """Mean average precision metric"""
15
 
16
  import evaluate
17
  import datasets
18
+ import json
19
+ from ranx import Qrels, Run
20
+ from ranx import evaluate as ran_evaluate
21
 
22
 
 
23
  _CITATION = """\
24
+ @inproceedings{ranx,
25
+ author = {Elias Bassani},
26
+ title = {ranx: {A} Blazing-Fast Python Library for Ranking Evaluation and Comparison},
27
+ booktitle = {{ECIR} {(2)}},
28
+ series = {Lecture Notes in Computer Science},
29
+ volume = {13186},
30
+ pages = {259--264},
31
+ publisher = {Springer},
32
+ year = {2022},
33
+ doi = {10.1007/978-3-030-99739-7\_30}
34
  }
35
  """
36
 
 
37
  _DESCRIPTION = """\
38
+ This is the mean reciprocal rank (mrr) metric for retrieval systems.
39
+ It is the multiplicative inverse of the rank of the first retrieved relevant document: 1 for first place, 1/2 for second place, 1/3 for third place, and so on. You can refer to [here](https://amenra.github.io/ranx/metrics/#mean-reciprocal-rank)
40
  """
41
 
42
 
 
43
  _KWARGS_DESCRIPTION = """
 
44
  Args:
45
+ predictions: dictionary of dictionaries where each dictionary consists of document relevancy scores produced by the model for a given query
46
+ One dictionary per query.
47
+ references: List of list of strings where each lists consists of the relevant document names for a given query in a sorted relevancy order.
48
+ The outer list is sorted from query one to n.
49
+ k: `int`, optional, default is None, it is to calculate mrr@k
50
  Returns:
51
+ mrr (`float`): mean reciprocal rank. Minimum possible value is 0. Maximum possible value is 1.0
 
52
  Examples:
53
+
54
+ >>> my_new_module = evaluate.load("mrr")
55
+ >>> references= [json.dumps({"q_1":{"d_1":1, "d_2":2} }),
56
+ json.dumps({"q_2":{"d_2":1, "d_3":2, "d_5":3}})]
57
+ >>> predictions = [json.dumps({"q_1": { "d_1": 0.8, "d_2": 0.9}}),
58
+ json.dumps({"q_2": {"d_2": 0.9, "d_1": 0.8, "d_5": 0.7, "d_3": 0.3}})]
59
+ >>> results = my_new_module.compute(references=references, predictions=predictions)
60
  >>> print(results)
61
+ {'recall': 1.0}
62
  """
63
 
 
 
 
 
64
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
65
+ class map(evaluate.Metric):
 
 
66
  def _info(self):
 
67
  return evaluate.MetricInfo(
68
  # This is the description that will appear on the modules page.
69
  module_type="metric",
 
72
  inputs_description=_KWARGS_DESCRIPTION,
73
  # This defines the format of each prediction and reference
74
  features=datasets.Features({
75
+ 'predictions': datasets.Value("string"),
76
+ 'references': datasets.Value("string"),
77
+ 'k': datasets.Value("int", default=None)
78
  }),
79
  # Homepage of the module for documentation
80
+ reference_urls=["https://amenra.github.io/ranx/"]
 
 
 
81
  )
82
 
83
+ def _compute(self, predictions, references, k=None):
 
 
 
 
 
84
  """Returns the scores"""
85
+ preds = {}
86
+ refs = {}
87
+ for pred in predictions:
88
+ preds = preds | json.loads(pred)
89
+ for ref in references:
90
+ refs = refs | json.loads(ref)
91
+
92
+ run = Run(preds)
93
+ qrels = Qrels(refs)
94
+ metric = "mrr" if k is None else f"mrr@{k}"
95
+ mrr_score = ran_evaluate(qrels, run, metric)
96
  return {
97
+ "mrr": mrr_score,
98
  }
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
1
+ git+https://github.com/huggingface/evaluate@main
2
+ ranx==0.3.19
tests.py DELETED
@@ -1,17 +0,0 @@
1
- test_cases = [
2
- {
3
- "predictions": [0, 0],
4
- "references": [1, 1],
5
- "result": {"metric_score": 0}
6
- },
7
- {
8
- "predictions": [1, 1],
9
- "references": [1, 1],
10
- "result": {"metric_score": 1}
11
- },
12
- {
13
- "predictions": [1, 0],
14
- "references": [1, 1],
15
- "result": {"metric_score": 0.5}
16
- }
17
- ]