Spaces:

tlkh
/

textdiff

Runtime error

App Files Files Community

tlkh commited on May 12, 2022

Commit

10e7b70

1 Parent(s): f4bf233

Initial commit

Browse files

Files changed (2) hide show

app.py +117 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import streamlit as st
+import spacy
+from paraphrase_metrics import metrics as pm
+import time
+import difflib
+st.set_page_config(page_title="TextDiff Visualizer")
+def render_single_para(paragraph, segment_info, prefix="a", other="b", gap=" "):
+    # span (diff text) change from red to cyan
+    span_diff_1 = """<span style="background-color:LightCoral;color:black;border-radius:2px;" onmouseover="chbg_"""
+    span_diff_2 = """('cyan')" onmouseout="chbg_"""
+    span_diff_3 = """('LightCoral')" id='"""
+    # span (same text) change from green to cyan
+    span_same_1 = """<span style="background-color:LightGreen;color:black;border-radius:2px;" onmouseover="chbg_"""
+    span_same_2 = """('cyan')" onmouseout="chbg_"""
+    span_same_3 = """('LightGreen')" id='"""
+    segments = ["<p>",]
+    for i, m in enumerate(segment_info):
+        span1_id = prefix+"_"+str(i)+"_1"
+        span1_id_other = other+"_"+str(i)+"_1"
+        if i > 0:
+            m_prev = segment_info[i-1]
+            segment1 = span_diff_1 + span1_id + span_diff_2 + span1_id + span_diff_3 + span1_id + "'>" + paragraph[m_prev[0]+m_prev[1]:m[0]] + "</span>"
+        else:
+            segment1 = span_diff_1 + span1_id + span_diff_2 + span1_id + span_diff_3 + span1_id + "'>" + paragraph[:m[0]] + "</span>"
+        span2_id = prefix+"_"+str(i)+"_2"
+        span2_id_other = other+"_"+str(i)+"_2"
+        segment2 = span_same_1 + span2_id + span_same_2 + span2_id + span_same_3 + span2_id + "'>" + paragraph[m[0]:m[0]+m[1]] + "</span>"
+        highlighting_code = """<script>
+        function chbg_"""+span1_id+"""(colour){
+        document.getElementById('"""+span1_id+"""').style.backgroundColor=colour;
+        document.getElementById('"""+span1_id_other+"""').style.backgroundColor=colour;
+        }
+        function chbg_"""+span2_id+"""(colour){
+        document.getElementById('"""+span2_id+"""').style.backgroundColor=colour;
+        document.getElementById('"""+span2_id_other+"""').style.backgroundColor=colour;
+        }
+        </script>"""
+        segments += [highlighting_code, segment1, segment2]
+    segments.append("</p>")
+    return gap.join(segments)
+def render_diff(a_parapgraph, b_parapgraph, gap=" ", prefix=None):
+    if prefix is None:
+        prefix = str(int(time.time()))
+    s = difflib.SequenceMatcher(None, a_parapgraph.lower(), b_parapgraph.lower(), autojunk=False)
+    matching_blocks = s.get_matching_blocks()
+    # a
+    a_segment_info = [[b.a,b.size] for b in matching_blocks]
+    a_html_paragraph = render_single_para(a_parapgraph, a_segment_info, gap=gap, prefix=prefix+"_a", other=prefix+"_b")
+    # b
+    b_segment_info = [[b.b,b.size] for b in matching_blocks]
+    b_html_paragraph = render_single_para(b_parapgraph, b_segment_info, gap=gap, prefix=prefix+"_b", other=prefix+"_a")
+    # table
+    table = """<table style="width:100%;font-family:sans-serif;font-size:large;"><tr style="background-color:white;padding=1px;">
+        <td style="border: 1px solid silver;padding:0.4em;border-radius:4px;">"""+a_html_paragraph+"""</td>
+        <td style="border: 1px solid silver;padding:0.4em;border-radius:4px;">"""+b_html_paragraph+"""</td>
+    </tr></table>"""
+    return table
+@st.cache(allow_output_mutation=True)
+def load_model():
+    nlp = spacy.load("en_core_web_sm")
+    return nlp
+nlp = load_model()
+st.markdown("### TextDiff Visualizer")
+mode = st.selectbox("Input", ["Custom", "Examples"])
+if mode == "Custom":
+    col1, col2 = st.columns(2)
+    with col1:
+        text_A = st.text_area("Text 1", value="The findings  are  being  published July 1st in the Annals of Internal Medicine.")
+    with col2:
+        text_B = st.text_area("Text 2", value="The findings are published in the July 1st issue of the Annals of Internal Medicine.")
+else:
+    examples = st.radio("Examples", [
+        "The top rate will go to 4.45 percent for all residents with taxable incomes above $500,000. ; For residents with incomes above $500,000, the income-tax rate will increase to 4.45 percent.",
+        "However, prosecutors have declined to take criminal action against guards, though Fine said his inquiry is not finished. ; Prosecutors have declined to take criminal action against corrections officers, although Fine said his inquiry was not finished.",
+        "In trading on the New York Stock Exchange, Kraft shares fell 25 cents to close at $32.30. ; Kraft's shares fell 25 cents to close at $32.30 yesterday on the New York Stock Exchange.",
+        "An attempt last month in the Senate to keep the fund open for another year fell flat. ; An attempt to keep the fund open for another year fell flat in the Senate last month.",
+        "Prisoners were tortured and executed -- their ears and scalps severed for souvenirs. ; They frequently tortured and shot prisoners, severing ears and scalps for souvenirs.",
+        "American has laid off 6,500 of its flight attendants since Dec. 31. ; Since October 2001, American has laid off 6,149 flight attendants.",
+        ])
+    text_A, text_B = examples.split(" ; ")
+st.markdown("Visualization")
+html_viz = render_diff(text_A, text_B)
+st.components.v1.html(html_viz)
+dist = round(pm.edit_distance(text_A, text_B), 2)
+bleu = round(pm.self_bleu(text_A, text_B), 2)
+text_A, text_B = nlp(text_A), nlp(text_B)
+wpd = round(pm.wpd(text_A, text_B), 2)
+ld = round(pm.ld(text_A, text_B), 2)
+metriccol1, metriccol2, metriccol3, metriccol4 = st.columns(4)
+metriccol1.metric("WPD", wpd)
+metriccol2.metric("LD", ld)
+metriccol3.metric("Edit Dist.", dist)
+metriccol4.metric("BLEU", bleu)
+with st.expander("More info"):
+    st.markdown("""**Explantion of Metrics**
+* **WPD**: Word Position Deviation measures structural changes between two paraphrases
+* **LD**: Lexical Deviation measures degree of vocabulary changes between two paraphrases
+* **Edit Dist.**: Levenshtein edit distance
+* **BLEU**: SELF-BLEU score
+For more information, see https://github.com/tlkh/paraphrase-metrics
+    """)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+spacy
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
+paraphrase_metrics