Spaces:
Sleeping
Sleeping
DongfuJiang
commited on
Commit
•
84a5d01
1
Parent(s):
ec1da13
update
Browse filesupdate
update
- .gitignore +2 -0
- app.py +20 -6
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
/__pycache__
|
2 |
+
/gradio_cached_examples
|
app.py
CHANGED
@@ -6,8 +6,7 @@ from typing import List
|
|
6 |
import utils
|
7 |
|
8 |
|
9 |
-
DESCRIPTIONS = """
|
10 |
-
|
11 |
We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
|
12 |
|
13 |
### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) [**Paper**](https://arxiv.org/abs/2310.00752) [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B-V1.0) [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B-V1.0)
|
@@ -37,15 +36,22 @@ def tigerscore(task, input_context, generation_instruction, hypo_output, max_new
|
|
37 |
def get_examples(task, inst_textbox, input_textbox, hypo_output_textbox):
|
38 |
return gr.Dropdown.update(value=task), inst_textbox, input_textbox, hypo_output_textbox
|
39 |
|
|
|
|
|
|
|
40 |
## initialize the model
|
41 |
print("Loading TIGERScore model...")
|
42 |
utils.load_tigerscore("7b")
|
43 |
|
44 |
with gr.Blocks(theme='gradio/soft') as demo:
|
45 |
-
|
46 |
-
gr.Markdown("## TIGERScore
|
|
|
|
|
|
|
47 |
|
48 |
-
|
|
|
49 |
inst_textbox = gr.Textbox(lines=1, label="Instruction", placeholder="Enter instruction here", show_label=True)
|
50 |
input_textbox = gr.Textbox(lines=4, label="Input Context", placeholder="Enter input context here", show_label=True)
|
51 |
hypo_output_textbox = gr.Textbox(lines=4, label="Hypothesis Output", placeholder="Enter hypothesis output to be evaluated here", show_label=True)
|
@@ -86,6 +92,12 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
86 |
inputs=[tasks_dropdown, input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
|
87 |
outputs=evaluation_output_textbox,
|
88 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
batch_examples = gr.Examples(
|
91 |
examples=EXAMPLES,
|
@@ -97,11 +109,13 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
97 |
)
|
98 |
|
99 |
citations = gr.Markdown("""## Citation
|
|
|
100 |
@article{jiang2023TIGERScore,
|
101 |
title={TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks},
|
102 |
author={Dongfu Jiang, Yishan Li, Ge Zhang, Wenhao Huang, Bill Yuchen Lin, Wenhu Chen},
|
103 |
journal={arXiv preprint arXiv:2310.00752},
|
104 |
year={2023}
|
105 |
-
}
|
|
|
106 |
|
107 |
demo.queue(max_size=20).launch()
|
|
|
6 |
import utils
|
7 |
|
8 |
|
9 |
+
DESCRIPTIONS = """
|
|
|
10 |
We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
|
11 |
|
12 |
### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/) [**Paper**](https://arxiv.org/abs/2310.00752) [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B-V1.0) [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B-V1.0)
|
|
|
36 |
def get_examples(task, inst_textbox, input_textbox, hypo_output_textbox):
|
37 |
return gr.Dropdown.update(value=task), inst_textbox, input_textbox, hypo_output_textbox
|
38 |
|
39 |
+
def clear_all(task, inst_textbox, input_textbox, hypo_output_textbox):
|
40 |
+
return gr.Dropdown.update(value=task), "", "", ""
|
41 |
+
|
42 |
## initialize the model
|
43 |
print("Loading TIGERScore model...")
|
44 |
utils.load_tigerscore("7b")
|
45 |
|
46 |
with gr.Blocks(theme='gradio/soft') as demo:
|
47 |
+
|
48 |
+
gr.Markdown("## 🐯 TIGERScore Demo")
|
49 |
+
with gr.Row():
|
50 |
+
gr.Markdown(DESCRIPTIONS)
|
51 |
+
gr.Image("https://jdf-prog.github.io/assets/img/publication_preview/tigerscore_preview.png")
|
52 |
|
53 |
+
gr.Markdown("## TIGERScore Inputs")
|
54 |
+
tasks_dropdown = gr.Dropdown(label="Task", choices=utils.tasks, value="translation", show_label=True, allow_custom_value=True)
|
55 |
inst_textbox = gr.Textbox(lines=1, label="Instruction", placeholder="Enter instruction here", show_label=True)
|
56 |
input_textbox = gr.Textbox(lines=4, label="Input Context", placeholder="Enter input context here", show_label=True)
|
57 |
hypo_output_textbox = gr.Textbox(lines=4, label="Hypothesis Output", placeholder="Enter hypothesis output to be evaluated here", show_label=True)
|
|
|
92 |
inputs=[tasks_dropdown, input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
|
93 |
outputs=evaluation_output_textbox,
|
94 |
)
|
95 |
+
|
96 |
+
clear_button.click(
|
97 |
+
fn=clear_all,
|
98 |
+
inputs=[tasks_dropdown, inst_textbox, input_textbox, hypo_output_textbox],
|
99 |
+
outputs=[tasks_dropdown, inst_textbox, input_textbox, hypo_output_textbox],
|
100 |
+
)
|
101 |
|
102 |
batch_examples = gr.Examples(
|
103 |
examples=EXAMPLES,
|
|
|
109 |
)
|
110 |
|
111 |
citations = gr.Markdown("""## Citation
|
112 |
+
```txt
|
113 |
@article{jiang2023TIGERScore,
|
114 |
title={TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks},
|
115 |
author={Dongfu Jiang, Yishan Li, Ge Zhang, Wenhao Huang, Bill Yuchen Lin, Wenhu Chen},
|
116 |
journal={arXiv preprint arXiv:2310.00752},
|
117 |
year={2023}
|
118 |
+
}
|
119 |
+
```""")
|
120 |
|
121 |
demo.queue(max_size=20).launch()
|