File size: 4,571 Bytes
0583214
9d0bbec
0583214
 
 
 
 
 
 
 
 
 
9d0bbec
0583214
 
 
 
 
 
 
 
 
 
 
 
 
9d0bbec
0583214
 
 
 
 
 
 
 
 
2ce0e0f
0583214
 
 
 
 
9d0bbec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0583214
9d0bbec
0583214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce0e0f
0583214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d0bbec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0583214
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from io import StringIO
import itertools

import gradio as gr
import pandas as pd
import spacy


nlp = spacy.load('en_core_web_sm')

HTML_RED = '<span style="background-color: rgba(255, 0, 0, 0.2)">{t}</span>'
HTML_GRN = '<span style="background-color: rgba(0, 255, 0, 0.3)">{t}</span>'
HTML_YLW = '<span style="background-color: rgba(255, 255, 0, 0.3)">{t}</span>'
HTML_BLU = '<span style="background-color: rgba(0, 0, 255, 0.2)">{t}</span>'
HTML_PLN = '<span>{t}</span>'
TABLE_CSS = '''
th, td {
    padding: 4px;
}
table, th, td {
  border: 1px solid black;
  border-collapse: collapse;

}
'''


def colorize(file_obj):
    with open(file_obj.name, 'r') as f:
        raw = f.read()
        raw = raw[raw.find('example_id'):]
        data = pd.read_csv(StringIO(raw))

    table_content = []

    for row in data.iterrows():
        id_ = row[1]['example_id']
        gold, genA, genB = nlp.pipe((
            row[1]['target summary'],
            row[1]['model summary A'],
            row[1]['model summary B']
        ))
        tokens_gold = {token.lemma_.lower(): 0 for token in gold}
        for token in itertools.chain(genA, genB):
            if token.lemma_.lower() in tokens_gold:
                tokens_gold[token.lemma_.lower()] += 1

        gold_text = ''.join([
            (
                HTML_PLN.format(t=token.text)
                if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
                else (
                    (
                        HTML_BLU if tokens_gold[token.lemma_.lower()] > 0
                        else HTML_YLW
                    ).format(t=token.text)
                )
            ) + token.whitespace_
            for token in gold
        ])
        table_content.append(
            [id_, gold_text] +
            [
                ''.join(
                    (
                        HTML_PLN.format(t=token.text)
                        if token.pos_ not in {'NOUN', 'PROPN', 'VERB'}
                        else (
                            HTML_GRN.format(t=token.text)
                            if token.lemma_.lower() in tokens_gold
                            else HTML_RED.format(t=token.text)
                        )
                    ) + token.whitespace_
                    for token in gen
                )
                for gen in (genA, genB)
            ]
        )

    # return an HTML table using data in table_content
    return '\n'.join((
        '<table>',
        "<tr>"
        "<td><b>id</b></td>",
        "<td><b>Gold</b></td>",
        "<td><b>Model A</b></td>",
        "<td><b>Model B</b></td>",
        "</tr>",
        '\n'.join(
            '<tr>\n' +
            '\n'.join('<td>{}</td>'.format(cell) for cell in row) +
            '\n</tr>'
            for row in table_content
        ),
        '</table>'
    ))


def main():
    with gr.Blocks(css=TABLE_CSS) as demo:
        gr.Markdown(
            "After uploading, click Run and switch to the Visualization tab."
        )
        with gr.Tabs():
            with gr.TabItem("Upload"):
                data = gr.File(
                    label='upload csv with Annotations', type='file'
                )
                run = gr.Button(label='Run')
            with gr.TabItem("Visualization"):
                gr.HTML(
                    ''.join(
                        (
                            "<b>Explanation of colors:</b>",
                            "<br><ul>",
                            "<li><b>",
                            HTML_RED.format(t='Red'),
                            "</b>: word is in generated, but not in gold.</li>",
                            "<li><b>",
                            HTML_GRN.format(t='Green'),
                            "</b>: word is in generated summary and gold.</li>",
                            "<li><b>",
                            HTML_YLW.format(t='Yellow'),
                            "</b>: word is in gold, but not in generated.</li>",
                            "<li><b>",
                            HTML_BLU.format(t='Blue'),
                            "</b>: word is in gold and in generated.</li>",
                            "</ul>",
                            "<br>",
                            "<b>Important</b>: Only nouns, verbs and proper ",
                            "nouns are colored.</b>"
                        )
                    )
                )
                viz = gr.HTML(label='Upload a csv file to start.')
        run.click(colorize, data, viz)

    demo.launch()


if __name__ == '__main__':
    main()