Martin Dočekal commited on
Commit
732e363
1 Parent(s): d38d998

init. code for ROUGERaw wrapper

Browse files
Files changed (3) hide show
  1. README.md +87 -6
  2. app.py +12 -0
  3. rouge_raw.py +231 -0
README.md CHANGED
@@ -1,12 +1,93 @@
1
  ---
2
- title: Rouge Raw
3
- emoji: 🏢
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: RougeRaw
3
+ emoji: 🤗
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.19.1
8
  app_file: app.py
9
  pinned: false
10
+ tags:
11
+ - evaluate
12
+ - metric
13
+ description: >-
14
+ ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
15
+ This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
16
  ---
17
 
18
+ # Metric Card for RougeRaw
19
+
20
+
21
+ ## Metric Description
22
+
23
+ ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
24
+ This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
25
+
26
+
27
+ ## How to Use
28
+
29
+
30
+ ```python
31
+ >>> rougeraw = evaluate.load('CZLC/rouge_raw')
32
+ >>> predictions = ["the cat is on the mat", "hello there"]
33
+ >>> references = ["the cat is on the mat", "hello there"]
34
+ >>> results = rougeraw.compute(predictions=predictions, references=references)
35
+ >>> print(results)
36
+ {'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
37
+ ```
38
+
39
+
40
+ ### Inputs
41
+ predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces.
42
+ references: list of reference for each prediction. Each reference should be a string with tokens separated by space
43
+
44
+ ### Output Values
45
+ - rougeraw1_precision
46
+ - rougeraw1_recall
47
+ - rougeraw1_fmeasure
48
+ - rougeraw2_precision
49
+ - rougeraw2_recall
50
+ - rougeraw2_fmeasure
51
+ - rougerawl_precision
52
+ - rougerawl_recall
53
+ - rougerawl_fmeasure
54
+
55
+ Output Example(s):
56
+ ```python
57
+ {'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
58
+ ```
59
+
60
+ This metric outputs a dictionary, containing the scores.
61
+
62
+ ## Citation(s)
63
+ ```bibtex
64
+ @inproceedings{straka-etal-2018-sumeczech,
65
+ title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
66
+ author = "Straka, Milan and
67
+ Mediankin, Nikita and
68
+ Kocmi, Tom and
69
+ {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k and
70
+ Hude{\v{c}}ek, Vojt{\v{e}}ch and
71
+ Haji{\v{c}}, Jan",
72
+ editor = "Calzolari, Nicoletta and
73
+ Choukri, Khalid and
74
+ Cieri, Christopher and
75
+ Declerck, Thierry and
76
+ Goggi, Sara and
77
+ Hasida, Koiti and
78
+ Isahara, Hitoshi and
79
+ Maegaard, Bente and
80
+ Mariani, Joseph and
81
+ Mazo, H{\'e}l{\`e}ne and
82
+ Moreno, Asuncion and
83
+ Odijk, Jan and
84
+ Piperidis, Stelios and
85
+ Tokunaga, Takenobu",
86
+ booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
87
+ month = may,
88
+ year = "2018",
89
+ address = "Miyazaki, Japan",
90
+ publisher = "European Language Resources Association (ELRA)",
91
+ url = "https://aclanthology.org/L18-1551",
92
+ }
93
+ ```
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ Created on 02.02.24
4
+
5
+ :author: Martin Dočekal
6
+ """
7
+ import evaluate
8
+ from evaluate.utils import launch_gradio_widget
9
+
10
+
11
+ module = evaluate.load("accuracy")
12
+ launch_gradio_widget(module)
rouge_raw.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: UTF-8 -*-
2
+ """
3
+ Created on 02.02.24
4
+ Module for raw ROUGE score calculation from:
5
+ @inproceedings{straka-etal-2018-sumeczech,
6
+ title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
7
+ author = "Straka, Milan and
8
+ Mediankin, Nikita and
9
+ Kocmi, Tom and
10
+ {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k and
11
+ Hude{\v{c}}ek, Vojt{\v{e}}ch and
12
+ Haji{\v{c}}, Jan",
13
+ editor = "Calzolari, Nicoletta and
14
+ Choukri, Khalid and
15
+ Cieri, Christopher and
16
+ Declerck, Thierry and
17
+ Goggi, Sara and
18
+ Hasida, Koiti and
19
+ Isahara, Hitoshi and
20
+ Maegaard, Bente and
21
+ Mariani, Joseph and
22
+ Mazo, H{\'e}l{\`e}ne and
23
+ Moreno, Asuncion and
24
+ Odijk, Jan and
25
+ Piperidis, Stelios and
26
+ Tokunaga, Takenobu",
27
+ booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
28
+ month = may,
29
+ year = "2018",
30
+ address = "Miyazaki, Japan",
31
+ publisher = "European Language Resources Association (ELRA)",
32
+ url = "https://aclanthology.org/L18-1551",
33
+ }
34
+
35
+
36
+ :author: Martin Dočekal
37
+ """
38
+
39
+ import re
40
+ from typing import Sequence
41
+
42
+ import datasets
43
+ import evaluate
44
+
45
+
46
+ class RougeRaw:
47
+ """
48
+ This is the original implementation of the ROUGERaw metric.
49
+ Compute RougeRAW-1, RougeRAW-2, RougeRAW-L metrics.
50
+ """
51
+
52
+ class FScore:
53
+ """F1 score representation."""
54
+ def __init__(self, correct, gold, system):
55
+ self.p = correct / system if system else 0.
56
+ self.r = correct / gold if gold else 0.
57
+ self.f = 2 * correct / (system + gold) if system + gold else 0.
58
+
59
+ def _rouge_n(self, n, gold_words, system_words):
60
+ """Compute Rouge-n for given words."""
61
+ def n_grams(n, words):
62
+ ngrams = {}
63
+ total = 0
64
+ for i in range(len(words) - n + 1):
65
+ ngram = "\t".join(words[i:i + n])
66
+ ngrams[ngram] = 1 + ngrams.get(ngram, 0)
67
+ total += 1
68
+ return ngrams, total
69
+
70
+ gold_ngrams, gold_total = n_grams(n, gold_words)
71
+ system_ngrams, system_total = n_grams(n, system_words)
72
+
73
+ intersection = 0
74
+ for ngram in system_ngrams:
75
+ intersection += min(system_ngrams[ngram], gold_ngrams.get(ngram, 0))
76
+
77
+ return self.FScore(intersection, gold_total, system_total)
78
+
79
+ def _rouge_l(self, gold_words, system_words):
80
+ """Compute Rouge-L for given words."""
81
+ lcs = [[0] * len(system_words) for _ in gold_words]
82
+ for r in range(len(gold_words)):
83
+ for s in range(len(system_words)):
84
+ if gold_words[r] == system_words[s]:
85
+ lcs[r][s] = 1 + (lcs[r - 1][s - 1] if r and s else 0)
86
+ lcs[r][s] = max(lcs[r][s], lcs[r - 1][s] if r else 0)
87
+ lcs[r][s] = max(lcs[r][s], lcs[r][s - 1] if s else 0)
88
+
89
+ return self.FScore(lcs[-1][-1], len(gold_words), len(system_words))
90
+
91
+ def _tokenize(self, text):
92
+ """Tokenize given text."""
93
+ return re.sub(r"\s+", " ", re.sub(r"\b", " ", text, re.UNICODE), re.UNICODE).strip().split(" ")
94
+
95
+ def document(self, gold, system):
96
+ """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given documents.
97
+ Each document should be a string.
98
+ """
99
+
100
+ assert isinstance(gold, str) and isinstance(system, str), "Expected string arguments"
101
+
102
+ lc_gold_words = [word.lower() for word in self._tokenize(gold)]
103
+ lc_system_words = [word.lower() for word in self._tokenize(system)]
104
+
105
+ return {
106
+ "1": self._rouge_n(1, lc_gold_words, lc_system_words),
107
+ "2": self._rouge_n(2, lc_gold_words, lc_system_words),
108
+ "L": self._rouge_l(lc_gold_words, lc_system_words),
109
+ }
110
+
111
+ def corpus(self, gold, system):
112
+ """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given corpora.
113
+ Each corpus should be a collection of documents, each document a string.
114
+ """
115
+
116
+ assert isinstance(gold, list) and isinstance(system, list), "Expected list arguments"
117
+ assert len(gold) == len(system), "Given corpora should be of the same length"
118
+
119
+ rouge = {key: self.FScore(0, 0, 0) for key in ["1", "2", "L"]}
120
+
121
+ if len(gold):
122
+ for gold_document, system_document in zip(gold, system):
123
+ for key, value in self.document(gold_document, system_document).items():
124
+ rouge[key].p += value.p
125
+ rouge[key].r += value.r
126
+ rouge[key].f += value.f
127
+
128
+ for key in rouge:
129
+ rouge[key].p /= len(gold)
130
+ rouge[key].r /= len(gold)
131
+ rouge[key].f /= len(gold)
132
+
133
+ return rouge
134
+
135
+
136
+ _CITATION = """\
137
+ @inproceedings{straka-etal-2018-sumeczech,
138
+ title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
139
+ author = "Straka, Milan and
140
+ Mediankin, Nikita and
141
+ Kocmi, Tom and
142
+ {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k and
143
+ Hude{\v{c}}ek, Vojt{\v{e}}ch and
144
+ Haji{\v{c}}, Jan",
145
+ editor = "Calzolari, Nicoletta and
146
+ Choukri, Khalid and
147
+ Cieri, Christopher and
148
+ Declerck, Thierry and
149
+ Goggi, Sara and
150
+ Hasida, Koiti and
151
+ Isahara, Hitoshi and
152
+ Maegaard, Bente and
153
+ Mariani, Joseph and
154
+ Mazo, H{\'e}l{\`e}ne and
155
+ Moreno, Asuncion and
156
+ Odijk, Jan and
157
+ Piperidis, Stelios and
158
+ Tokunaga, Takenobu",
159
+ booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
160
+ month = may,
161
+ year = "2018",
162
+ address = "Miyazaki, Japan",
163
+ publisher = "European Language Resources Association (ELRA)",
164
+ url = "https://aclanthology.org/L18-1551",
165
+ }
166
+ """
167
+
168
+ _DESCRIPTION = """\
169
+ ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas.
170
+ This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
171
+ """
172
+
173
+ _KWARGS_DESCRIPTION = """
174
+ ROCUE RAW metric for list of predictions and references.
175
+ Args:
176
+ predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces.
177
+ references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces.
178
+ Returns:
179
+ rougeraw1_precision
180
+ rougeraw1_recall
181
+ rougeraw1_fmeasure
182
+ rougeraw2_precision
183
+ rougeraw2_recall
184
+ rougeraw2_fmeasure
185
+ rougerawl_precision
186
+ rougerawl_recall
187
+ rougerawl_fmeasure
188
+ Examples:
189
+ >>> rougeraw = evaluate.load('CZLC/rouge_raw')
190
+ >>> predictions = ["the cat is on the mat", "hello there"]
191
+ >>> references = ["the cat is on the mat", "hello there"]
192
+ >>> results = rougeraw.compute(predictions=predictions, references=references)
193
+ >>> print(results)
194
+ {'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
195
+ """
196
+
197
+
198
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
199
+ class Rouge(evaluate.Metric):
200
+ def _info(self):
201
+ return evaluate.MetricInfo(
202
+ description=_DESCRIPTION,
203
+ citation=_CITATION,
204
+ inputs_description=_KWARGS_DESCRIPTION,
205
+ features=[
206
+ datasets.Features(
207
+ {
208
+ "predictions": datasets.Value("string", id="sequence"),
209
+ "references": datasets.Value("string", id="sequence"),
210
+ }
211
+ ),
212
+ ],
213
+ reference_urls=[
214
+ "http://hdl.handle.net/11234/1-2615",
215
+ ],
216
+ )
217
+
218
+ def _compute(self, predictions: Sequence[str], references: Sequence[str]):
219
+ res = RougeRaw().corpus(references, predictions)
220
+ return {
221
+ "rougeraw1_precision": res["1"].p,
222
+ "rougeraw1_recall": res["1"].r,
223
+ "rougeraw1_fmeasure": res["1"].f,
224
+ "rougeraw2_precision": res["2"].p,
225
+ "rougeraw2_recall": res["2"].r,
226
+ "rougeraw2_fmeasure": res["2"].f,
227
+ "rougerawl_precision": res["L"].p,
228
+ "rougerawl_recall": res["L"].r,
229
+ "rougerawl_fmeasure": res["L"].f,
230
+ }
231
+