tlkh commited on
Commit
10e7b70
1 Parent(s): f4bf233

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +117 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import spacy
3
+ from paraphrase_metrics import metrics as pm
4
+ import time
5
+ import difflib
6
+
7
+ st.set_page_config(page_title="TextDiff Visualizer")
8
+
9
+ def render_single_para(paragraph, segment_info, prefix="a", other="b", gap=" "):
10
+ # span (diff text) change from red to cyan
11
+ span_diff_1 = """<span style="background-color:LightCoral;color:black;border-radius:2px;" onmouseover="chbg_"""
12
+ span_diff_2 = """('cyan')" onmouseout="chbg_"""
13
+ span_diff_3 = """('LightCoral')" id='"""
14
+ # span (same text) change from green to cyan
15
+ span_same_1 = """<span style="background-color:LightGreen;color:black;border-radius:2px;" onmouseover="chbg_"""
16
+ span_same_2 = """('cyan')" onmouseout="chbg_"""
17
+ span_same_3 = """('LightGreen')" id='"""
18
+ segments = ["<p>",]
19
+ for i, m in enumerate(segment_info):
20
+ span1_id = prefix+"_"+str(i)+"_1"
21
+ span1_id_other = other+"_"+str(i)+"_1"
22
+ if i > 0:
23
+ m_prev = segment_info[i-1]
24
+ segment1 = span_diff_1 + span1_id + span_diff_2 + span1_id + span_diff_3 + span1_id + "'>" + paragraph[m_prev[0]+m_prev[1]:m[0]] + "</span>"
25
+ else:
26
+ segment1 = span_diff_1 + span1_id + span_diff_2 + span1_id + span_diff_3 + span1_id + "'>" + paragraph[:m[0]] + "</span>"
27
+ span2_id = prefix+"_"+str(i)+"_2"
28
+ span2_id_other = other+"_"+str(i)+"_2"
29
+ segment2 = span_same_1 + span2_id + span_same_2 + span2_id + span_same_3 + span2_id + "'>" + paragraph[m[0]:m[0]+m[1]] + "</span>"
30
+ highlighting_code = """<script>
31
+ function chbg_"""+span1_id+"""(colour){
32
+ document.getElementById('"""+span1_id+"""').style.backgroundColor=colour;
33
+ document.getElementById('"""+span1_id_other+"""').style.backgroundColor=colour;
34
+ }
35
+ function chbg_"""+span2_id+"""(colour){
36
+ document.getElementById('"""+span2_id+"""').style.backgroundColor=colour;
37
+ document.getElementById('"""+span2_id_other+"""').style.backgroundColor=colour;
38
+ }
39
+ </script>"""
40
+ segments += [highlighting_code, segment1, segment2]
41
+ segments.append("</p>")
42
+ return gap.join(segments)
43
+
44
+ def render_diff(a_parapgraph, b_parapgraph, gap=" ", prefix=None):
45
+ if prefix is None:
46
+ prefix = str(int(time.time()))
47
+ s = difflib.SequenceMatcher(None, a_parapgraph.lower(), b_parapgraph.lower(), autojunk=False)
48
+ matching_blocks = s.get_matching_blocks()
49
+ # a
50
+ a_segment_info = [[b.a,b.size] for b in matching_blocks]
51
+ a_html_paragraph = render_single_para(a_parapgraph, a_segment_info, gap=gap, prefix=prefix+"_a", other=prefix+"_b")
52
+ # b
53
+ b_segment_info = [[b.b,b.size] for b in matching_blocks]
54
+ b_html_paragraph = render_single_para(b_parapgraph, b_segment_info, gap=gap, prefix=prefix+"_b", other=prefix+"_a")
55
+ # table
56
+ table = """<table style="width:100%;font-family:sans-serif;font-size:large;"><tr style="background-color:white;padding=1px;">
57
+ <td style="border: 1px solid silver;padding:0.4em;border-radius:4px;">"""+a_html_paragraph+"""</td>
58
+ <td style="border: 1px solid silver;padding:0.4em;border-radius:4px;">"""+b_html_paragraph+"""</td>
59
+ </tr></table>"""
60
+ return table
61
+
62
+ @st.cache(allow_output_mutation=True)
63
+ def load_model():
64
+ nlp = spacy.load("en_core_web_sm")
65
+ return nlp
66
+
67
+ nlp = load_model()
68
+
69
+ st.markdown("### TextDiff Visualizer")
70
+
71
+ mode = st.selectbox("Input", ["Custom", "Examples"])
72
+
73
+ if mode == "Custom":
74
+ col1, col2 = st.columns(2)
75
+ with col1:
76
+ text_A = st.text_area("Text 1", value="The findings are being published July 1st in the Annals of Internal Medicine.")
77
+ with col2:
78
+ text_B = st.text_area("Text 2", value="The findings are published in the July 1st issue of the Annals of Internal Medicine.")
79
+ else:
80
+ examples = st.radio("Examples", [
81
+ "The top rate will go to 4.45 percent for all residents with taxable incomes above $500,000. ; For residents with incomes above $500,000, the income-tax rate will increase to 4.45 percent.",
82
+ "However, prosecutors have declined to take criminal action against guards, though Fine said his inquiry is not finished. ; Prosecutors have declined to take criminal action against corrections officers, although Fine said his inquiry was not finished.",
83
+ "In trading on the New York Stock Exchange, Kraft shares fell 25 cents to close at $32.30. ; Kraft's shares fell 25 cents to close at $32.30 yesterday on the New York Stock Exchange.",
84
+ "An attempt last month in the Senate to keep the fund open for another year fell flat. ; An attempt to keep the fund open for another year fell flat in the Senate last month.",
85
+ "Prisoners were tortured and executed -- their ears and scalps severed for souvenirs. ; They frequently tortured and shot prisoners, severing ears and scalps for souvenirs.",
86
+ "American has laid off 6,500 of its flight attendants since Dec. 31. ; Since October 2001, American has laid off 6,149 flight attendants.",
87
+ ])
88
+ text_A, text_B = examples.split(" ; ")
89
+
90
+ st.markdown("Visualization")
91
+
92
+ html_viz = render_diff(text_A, text_B)
93
+
94
+ st.components.v1.html(html_viz)
95
+
96
+ dist = round(pm.edit_distance(text_A, text_B), 2)
97
+ bleu = round(pm.self_bleu(text_A, text_B), 2)
98
+ text_A, text_B = nlp(text_A), nlp(text_B)
99
+ wpd = round(pm.wpd(text_A, text_B), 2)
100
+ ld = round(pm.ld(text_A, text_B), 2)
101
+
102
+ metriccol1, metriccol2, metriccol3, metriccol4 = st.columns(4)
103
+ metriccol1.metric("WPD", wpd)
104
+ metriccol2.metric("LD", ld)
105
+ metriccol3.metric("Edit Dist.", dist)
106
+ metriccol4.metric("BLEU", bleu)
107
+
108
+ with st.expander("More info"):
109
+ st.markdown("""**Explantion of Metrics**
110
+
111
+ * **WPD**: Word Position Deviation measures structural changes between two paraphrases
112
+ * **LD**: Lexical Deviation measures degree of vocabulary changes between two paraphrases
113
+ * **Edit Dist.**: Levenshtein edit distance
114
+ * **BLEU**: SELF-BLEU score
115
+
116
+ For more information, see https://github.com/tlkh/paraphrase-metrics
117
+ """)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ spacy
3
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
4
+ paraphrase_metrics