unnati commited on
Commit
361f384
·
1 Parent(s): 1ce35f7

Add documenation fix + tests

Browse files

- Edit _KWARGS_DESCRIPTION to describe the expected input & exception
- Add examples to above doc string
- Remove citations, url (consider adding later)
- Add tests - all permutations of ['A', 'B','C']

Files changed (3) hide show
  1. app.py +1 -1
  2. kendall_tau_distance.py +31 -28
  3. tests.py +107 -11
app.py CHANGED
@@ -3,4 +3,4 @@ from evaluate.utils import launch_gradio_widget
3
 
4
 
5
  module = evaluate.load("unnati/kendall_tau_distance")
6
- launch_gradio_widget(module)
 
3
 
4
 
5
  module = evaluate.load("unnati/kendall_tau_distance")
6
+ launch_gradio_widget(module)
kendall_tau_distance.py CHANGED
@@ -28,39 +28,40 @@ year={2020}
28
 
29
  # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
 
 
32
  """
33
 
34
 
35
  # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
- Calculates how good are predictions given some references, using certain scores
38
  Args:
39
  predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
  kendall_tau_distance: Kendell's tau distance between predictions and references
45
  normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
 
 
 
 
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
  >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
51
- >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
- {'kendall_tau_distance': 0, 'normalized_kendall_tau_distance': 0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class kendalltaudistance(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
-
64
  def _info(self):
65
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
@@ -70,15 +71,12 @@ class kendalltaudistance(evaluate.Metric):
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
77
- # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
- # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
  def _compute(self, predictions, references):
@@ -86,19 +84,24 @@ class kendalltaudistance(evaluate.Metric):
86
  # TODO: Compute the different scores of the module
87
 
88
  n = len(predictions)
89
- assert n == len(references), "The number of predictions and references should be the same"
90
 
 
 
 
 
91
  n_discordant_pairs = 0
92
 
93
  for i in range(len(predictions)):
94
  j = references.index(predictions[i])
95
- n_discordant_pairs += len(set(predictions[:i]).intersection(set(references[j:]))) + len(set(predictions[i+1:]).intersection(set(references[:j])))
 
 
96
 
97
  n_discordant_pairs = n_discordant_pairs / 2
98
-
99
  num_pairs = n * (n - 1) / 2
100
-
101
  return {
102
- 'kendall_tau_distance': n_discordant_pairs,
103
- 'normalized_kendall_tau_distance': n_discordant_pairs / num_pairs,
104
- }
 
28
 
29
  # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
+ This new module is designed calculate kendall's tau distance between predictions and references.
32
+ It is also known as bubble sort distance.
33
+ It is equivalent to number of adjacent swaps required to convert predictions to references.
34
  """
35
 
36
 
37
  # TODO: Add description of the arguments of the module here
38
  _KWARGS_DESCRIPTION = """
39
+ Calculates how good are predictions given some references, usoing kendall's tau distance.
40
  Args:
41
  predictions: list of predictions to score. Each predictions
42
+ should be a string or tokens or int. The predictions should be unique.
43
+ references: list of reference for each prediction. Each reference
44
+ should be a string or tokens or int. The values in predictions and references should be the same.
45
  Returns:
46
  kendall_tau_distance: Kendell's tau distance between predictions and references
47
  normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
48
+
49
+ Exceptions:
50
+ AssertionError: If the predictions are not unique or if the values in predictions and references are not the same
51
+
52
  Examples:
53
  Examples should be written in doctest format, and should illustrate how
54
  to use the function.
55
 
56
  >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
57
+ >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[1, 0])
58
  >>> print(results)
59
+ {'kendall_tau_distance': 1.0, 'normalized_kendall_tau_distance': 1.0}
60
  """
61
 
 
 
 
62
 
63
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
64
  class kendalltaudistance(evaluate.Metric):
 
 
65
  def _info(self):
66
  # TODO: Specifies the evaluate.EvaluationModuleInfo object
67
  return evaluate.MetricInfo(
 
71
  citation=_CITATION,
72
  inputs_description=_KWARGS_DESCRIPTION,
73
  # This defines the format of each prediction and reference
74
+ features=datasets.Features(
75
+ {
76
+ "predictions": datasets.Value("int64"),
77
+ "references": datasets.Value("int64"),
78
+ }
79
+ )
 
 
 
80
  )
81
 
82
  def _compute(self, predictions, references):
 
84
  # TODO: Compute the different scores of the module
85
 
86
  n = len(predictions)
 
87
 
88
+ assert len(set(predictions)) == n, "The predictions should be unique"
89
+ assert set(predictions) == set(
90
+ references
91
+ ), "The values in predictions and references should be the same"
92
  n_discordant_pairs = 0
93
 
94
  for i in range(len(predictions)):
95
  j = references.index(predictions[i])
96
+ n_discordant_pairs += len(
97
+ set(predictions[:i]).intersection(set(references[j:]))
98
+ ) + len(set(predictions[i + 1 :]).intersection(set(references[:j])))
99
 
100
  n_discordant_pairs = n_discordant_pairs / 2
101
+
102
  num_pairs = n * (n - 1) / 2
103
+
104
  return {
105
+ "kendall_tau_distance": n_discordant_pairs,
106
+ "normalized_kendall_tau_distance": n_discordant_pairs / num_pairs,
107
+ }
tests.py CHANGED
@@ -1,17 +1,113 @@
1
  test_cases = [
2
  {
3
- "predictions": [0, 0],
4
- "references": [1, 1],
5
- "result": {"metric_score": 0}
 
 
 
6
  },
7
  {
8
- "predictions": [1, 1],
9
- "references": [1, 1],
10
- "result": {"metric_score": 1}
 
 
 
11
  },
12
  {
13
- "predictions": [1, 0],
14
- "references": [1, 1],
15
- "result": {"metric_score": 0.5}
16
- }
17
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  test_cases = [
2
  {
3
+ "predictions": ("A", "B", "C"),
4
+ "references": ("A", "C", "B"),
5
+ "result": {
6
+ "kendall_tau_distance": 1.0,
7
+ "normalized_kendall_tau_distance": 0.3333333333333333,
8
+ },
9
  },
10
  {
11
+ "predictions": ("A", "B", "C"),
12
+ "references": ("B", "A", "C"),
13
+ "result": {
14
+ "kendall_tau_distance": 1.0,
15
+ "normalized_kendall_tau_distance": 0.3333333333333333,
16
+ },
17
  },
18
  {
19
+ "predictions": ("A", "B", "C"),
20
+ "references": ("B", "C", "A"),
21
+ "result": {
22
+ "kendall_tau_distance": 2.0,
23
+ "normalized_kendall_tau_distance": 0.6666666666666666,
24
+ },
25
+ },
26
+ {
27
+ "predictions": ("A", "B", "C"),
28
+ "references": ("C", "A", "B"),
29
+ "result": {
30
+ "kendall_tau_distance": 2.0,
31
+ "normalized_kendall_tau_distance": 0.6666666666666666,
32
+ },
33
+ },
34
+ {
35
+ "predictions": ("A", "B", "C"),
36
+ "references": ("C", "B", "A"),
37
+ "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
38
+ },
39
+ {
40
+ "predictions": ("A", "C", "B"),
41
+ "references": ("B", "A", "C"),
42
+ "result": {
43
+ "kendall_tau_distance": 2.0,
44
+ "normalized_kendall_tau_distance": 0.6666666666666666,
45
+ },
46
+ },
47
+ {
48
+ "predictions": ("A", "C", "B"),
49
+ "references": ("B", "C", "A"),
50
+ "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
51
+ },
52
+ {
53
+ "predictions": ("A", "C", "B"),
54
+ "references": ("C", "A", "B"),
55
+ "result": {
56
+ "kendall_tau_distance": 1.0,
57
+ "normalized_kendall_tau_distance": 0.3333333333333333,
58
+ },
59
+ },
60
+ {
61
+ "predictions": ("A", "C", "B"),
62
+ "references": ("C", "B", "A"),
63
+ "result": {
64
+ "kendall_tau_distance": 2.0,
65
+ "normalized_kendall_tau_distance": 0.6666666666666666,
66
+ },
67
+ },
68
+ {
69
+ "predictions": ("B", "A", "C"),
70
+ "references": ("B", "C", "A"),
71
+ "result": {
72
+ "kendall_tau_distance": 1.0,
73
+ "normalized_kendall_tau_distance": 0.3333333333333333,
74
+ },
75
+ },
76
+ {
77
+ "predictions": ("B", "A", "C"),
78
+ "references": ("C", "A", "B"),
79
+ "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
80
+ },
81
+ {
82
+ "predictions": ("B", "A", "C"),
83
+ "references": ("C", "B", "A"),
84
+ "result": {
85
+ "kendall_tau_distance": 2.0,
86
+ "normalized_kendall_tau_distance": 0.6666666666666666,
87
+ },
88
+ },
89
+ {
90
+ "predictions": ("B", "C", "A"),
91
+ "references": ("C", "A", "B"),
92
+ "result": {
93
+ "kendall_tau_distance": 2.0,
94
+ "normalized_kendall_tau_distance": 0.6666666666666666,
95
+ },
96
+ },
97
+ {
98
+ "predictions": ("B", "C", "A"),
99
+ "references": ("C", "B", "A"),
100
+ "result": {
101
+ "kendall_tau_distance": 1.0,
102
+ "normalized_kendall_tau_distance": 0.3333333333333333,
103
+ },
104
+ },
105
+ {
106
+ "predictions": ("C", "A", "B"),
107
+ "references": ("C", "B", "A"),
108
+ "result": {
109
+ "kendall_tau_distance": 1.0,
110
+ "normalized_kendall_tau_distance": 0.3333333333333333,
111
+ },
112
+ },
113
+ ]