alireza_mshi commited on
Commit
2623164
1 Parent(s): 83b41ef

add app.py

Browse files
__pycache__/app.cpython-38.pyc ADDED
Binary file (4.67 kB). View file
 
__pycache__/app.cpython-39.pyc ADDED
Binary file (4.66 kB). View file
 
app.py CHANGED
@@ -1,7 +1,6 @@
1
- import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
 
 
 
4
 
5
+ module = evaluate.load("alirezamsh/rqugescore")
6
+ launch_gradio_widget(module)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ numpy
4
+ sentencepiece
5
+ rquge
rqugescore.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIT License
2
+
3
+ # Copyright (c) 2022 Alireza Mohammadshahi
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+ """ RQUGE metric. """
24
+
25
+ import functools
26
+ from contextlib import contextmanager
27
+
28
+ from rquge_score.scorer import RQUGE
29
+ import datasets
30
+ from packaging import version
31
+
32
+ import evaluate
33
+
34
+
35
+ @contextmanager
36
+ def filter_logging_context():
37
+ def filter_log(record):
38
+ return False if "This IS expected if you are initializing" in record.msg else True
39
+
40
+ logger = datasets.utils.logging.get_logger("transformers.modeling_utils")
41
+ logger.addFilter(filter_log)
42
+ try:
43
+ yield
44
+ finally:
45
+ logger.removeFilter(filter_log)
46
+
47
+
48
+ _CITATION = """\
49
+ @misc{mohammadshahi2022rquge,
50
+ title={RQUGE: Reference-Free Metric for Evaluating Question Generation by Answering the Question},
51
+ author={Alireza Mohammadshahi and Thomas Scialom and Majid Yazdani and Pouya Yanki and Angela Fan and James Henderson and Marzieh Saeidi},
52
+ year={2022},
53
+ eprint={2211.01482},
54
+ archivePrefix={arXiv},
55
+ primaryClass={cs.CL}
56
+ }
57
+ """
58
+
59
+ _DESCRIPTION = """\
60
+ RQUGE, a Reference-free QUestion Generation Evaluation metric that can compute the quality of
61
+ the candidate question without requiring the access to the reference question.
62
+ Given the corresponding context and answer span, our metric calculates the acceptability score
63
+ by applying a general question-answering module, followed by a span scorer. You can find
64
+ more detail in the paper (https://arxiv.org/abs/2211.01482) (ACL2023).
65
+ """
66
+
67
+ _KWARGS_DESCRIPTION = """
68
+ RQUGE Metric to compute the acceptability of generated question, given the context and answer.
69
+ Args:
70
+ generated_questions (list of str): Generated/candidate questions.
71
+ contexts (list of str): List of contexts.
72
+ answers (list of str): List of reference answers.
73
+ qa_model (str): Path to the QA model (local path or HF model hub), default: 'allenai/unifiedqa-v2-t5-large-1363200'
74
+ sp_model (str): Path of span scorer model (local path or HF model hub), default: 'alirezamsh/quip-512-mocha'
75
+ verbose (bool): Turn on intermediate status update.
76
+ device (str): On which the contextual embedding model will be allocated on.
77
+ If this argument is None, the model lives on cuda:0 if cuda is available.
78
+ nthreads (int): Number of threads.
79
+ batch_size (int): Bert score processing batch size,
80
+ at least one of `model_type` or `lang`. `lang` needs to be
81
+ specified when `rescale_with_baseline` is True.
82
+ Returns:
83
+ score: RQUGE score.
84
+ Examples:
85
+ >>> generated_questions = ["how is the weather?"]
86
+ >>> contexts = ["the weather is sunny"]
87
+ >>> answers = ["sunny"]
88
+ >>> rqugescore = evaluate.load("rquge")
89
+ >>> results = rquge.compute(generated_questions=generated_questions, contexts=contexts, answers=answers)
90
+ >>> print([round(v, 2) for v in results["score"]])
91
+ [5.0]
92
+ """
93
+
94
+
95
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
96
+ class RQUGEScore(evaluate.Metric):
97
+ def _info(self):
98
+ return evaluate.MetricInfo(
99
+ description=_DESCRIPTION,
100
+ citation=_CITATION,
101
+ homepage="https://github.com/alirezamshi/RQUGE",
102
+ inputs_description=_KWARGS_DESCRIPTION,
103
+ features=[
104
+ datasets.Features(
105
+ {
106
+ "generated_questions": datasets.Value("string", id="sequence"),
107
+ "contexts": datasets.Value("string", id="sequence"),
108
+ "answers": datasets.Value("string", id="sequence"),
109
+ }
110
+ ),
111
+ ],
112
+ codebase_urls=["https://github.com/alirezamshi/RQUGE"],
113
+ reference_urls=[
114
+ "https://github.com/alirezamshi/RQUGE",
115
+ "https://arxiv.org/abs/2211.01482",
116
+ ],
117
+ )
118
+
119
+ def _compute(
120
+ self,
121
+ generated_questions,
122
+ contexts,
123
+ answers,
124
+ qa_model="allenai/unifiedqa-v2-t5-large-1363200",
125
+ sp_model="alirezamsh/quip-512-mocha",
126
+ verbose=False,
127
+ device='cpu',
128
+ ):
129
+
130
+ rquge_model = RQUGE(sp_scorer_path=sp_model,qa_model_path=qa_model,device=device)
131
+
132
+ output = []
133
+ total = 0
134
+ for context, question, answer in zip(contexts, generated_questions, answers):
135
+ score = rquge_model.scorer(context, question, answer)
136
+ total += score
137
+ output.append(score)
138
+
139
+ if verbose:
140
+ print(f'Average RQUGE score is {total/len(output)}')
141
+ output_dict = {
142
+ "mean_score": total/len(output),
143
+ "instance_score": output,
144
+ }
145
+ return output_dict