import streamlit as st import spacy from paraphrase_metrics import metrics as pm import time import difflib st.set_page_config(page_title="TextDiff Visualizer") def render_single_para(paragraph, segment_info, prefix="a", other="b", gap=" "): # span (diff text) change from red to cyan span_diff_1 = """" + paragraph[m_prev[0]+m_prev[1]:m[0]] + "" else: segment1 = span_diff_1 + span1_id + span_diff_2 + span1_id + span_diff_3 + span1_id + "'>" + paragraph[:m[0]] + "" span2_id = prefix+"_"+str(i)+"_2" span2_id_other = other+"_"+str(i)+"_2" segment2 = span_same_1 + span2_id + span_same_2 + span2_id + span_same_3 + span2_id + "'>" + paragraph[m[0]:m[0]+m[1]] + "" highlighting_code = """""" segments += [highlighting_code, segment1, segment2] segments.append("

") return gap.join(segments) def render_diff(a_parapgraph, b_parapgraph, gap=" ", prefix=None): if prefix is None: prefix = str(int(time.time())) s = difflib.SequenceMatcher(None, a_parapgraph.lower(), b_parapgraph.lower(), autojunk=False) matching_blocks = s.get_matching_blocks() # a a_segment_info = [[b.a,b.size] for b in matching_blocks] a_html_paragraph = render_single_para(a_parapgraph, a_segment_info, gap=gap, prefix=prefix+"_a", other=prefix+"_b") # b b_segment_info = [[b.b,b.size] for b in matching_blocks] b_html_paragraph = render_single_para(b_parapgraph, b_segment_info, gap=gap, prefix=prefix+"_b", other=prefix+"_a") # table table = """
"""+a_html_paragraph+""" """+b_html_paragraph+"""
""" return table @st.cache(allow_output_mutation=True) def load_model(): nlp = spacy.load("en_core_web_sm") return nlp nlp = load_model() st.markdown("### TextDiff Visualizer") mode = st.selectbox("Input", ["Custom", "Examples"]) if mode == "Custom": col1, col2 = st.columns(2) with col1: text_A = st.text_area("Text 1", value="The findings are being published July 1st in the Annals of Internal Medicine.") with col2: text_B = st.text_area("Text 2", value="The findings are published in the July 1st issue of the Annals of Internal Medicine.") else: examples = st.radio("Examples", [ "The top rate will go to 4.45 percent for all residents with taxable incomes above $500,000. ; For residents with incomes above $500,000, the income-tax rate will increase to 4.45 percent.", "However, prosecutors have declined to take criminal action against guards, though Fine said his inquiry is not finished. ; Prosecutors have declined to take criminal action against corrections officers, although Fine said his inquiry was not finished.", "In trading on the New York Stock Exchange, Kraft shares fell 25 cents to close at $32.30. ; Kraft's shares fell 25 cents to close at $32.30 yesterday on the New York Stock Exchange.", "An attempt last month in the Senate to keep the fund open for another year fell flat. ; An attempt to keep the fund open for another year fell flat in the Senate last month.", "Prisoners were tortured and executed -- their ears and scalps severed for souvenirs. ; They frequently tortured and shot prisoners, severing ears and scalps for souvenirs.", "American has laid off 6,500 of its flight attendants since Dec. 31. ; Since October 2001, American has laid off 6,149 flight attendants.", ]) text_A, text_B = examples.split(" ; ") st.markdown("Visualization") html_viz = render_diff(text_A, text_B) st.components.v1.html(html_viz) dist = round(pm.edit_distance(text_A, text_B), 2) bleu = round(pm.self_bleu(text_A, text_B), 2) text_A, text_B = nlp(text_A), nlp(text_B) wpd = round(pm.wpd(text_A, text_B), 2) ld = round(pm.ld(text_A, text_B), 2) metriccol1, metriccol2, metriccol3, metriccol4 = st.columns(4) metriccol1.metric("WPD", wpd) metriccol2.metric("LD", ld) metriccol3.metric("Edit Dist.", dist) metriccol4.metric("BLEU", bleu) with st.expander("More info"): st.markdown("""**Explantion of Metrics** * **WPD**: Word Position Deviation measures structural changes between two paraphrases * **LD**: Lexical Deviation measures degree of vocabulary changes between two paraphrases * **Edit Dist.**: Levenshtein edit distance * **BLEU**: SELF-BLEU score For more information, see https://github.com/tlkh/paraphrase-metrics """)