Spaces:

alice-hml
/

french_gec

Runtime error

App Files Files

alice-hml commited on May 26, 2022

Commit

3ee4a68

•

1 Parent(s): 3236f2a

Upload app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# %load apps/slider.py
+from cgitb import html
+import time
+import streamlit as st
+from annotated_text import annotated_text
+import json
+import random
+import corrector
+import classifier
+from difflib import SequenceMatcher
+def underline_diff(a,b):
+    # source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline
+    seqm = SequenceMatcher(None, a.split(),b.split())
+    output= []
+    for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
+        if opcode == 'equal':
+            output.append(' '.join(seqm.a[a0:a1]))
+        elif opcode == 'insert':
+            output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>")
+        elif opcode == 'delete':
+            output.append("<del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
+        elif opcode == 'replace':
+            output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>") # + " <del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
+    return ' '.join(output)
+example_sentences = [sent["errorful"] for sent in json.load(open("dev.json"))]
+st.title("Virtual tutor - Proof of concept")
+with st.sidebar:
+    st.image("seedlang_logo.png", width=60)
+    st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.")
+    ## include 5 error types
+    st.subheader("Error types")
+    st.write("This PoC corrects and detects 5 type of French beginner mistakes:")
+    error_descriptions = {
+        "ADJ": """adjective has wrong gender agreement (<i>*la grand table</i>)""",
+        "ART": """article has wrong gender agreement (<i>*le table</i>)""",
+        "ELI": """elision is missing (<i>*je adore</i>)""",
+        "FIN": """non-finite form of the verb instead of a finite form (<i>*je adorer</i>)""",
+        "NEG": """problem with the position of the negation (<i>*je ne pas adore</i>)"""
+    }
+    for error_type, colour in classifier.COLOURS.items():
+        label = f"""<span style="background: {colour}; border-radius: 0.33rem; padding: 0.125rem 0.5rem; overflow: hidden;"><span style="padding-left: 0.5rem; text-transform: uppercase;"><span style="font-size: 0.67em; opacity: 0.8;">{error_type}</span></span></span> - {error_descriptions[error_type]}"""
+        st.markdown(label, unsafe_allow_html=True)
+    st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.")
+if "value" not in st.session_state:
+    st.session_state["value"] = random.choice(example_sentences)
+random_ex = st.button("Get another random example")
+if random_ex:
+    st.session_state["value"] = random.choice(example_sentences)
+sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"])
+submit = st.button("Correct")
+if submit:
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Labelled errors")
+        with st.spinner("Labelling errors"):
+            annotated_text(*classifier.annotate(sentence))
+    with col2:
+        st.subheader("Corrected sentence")
+        with st.spinner("Correcting the sentence"):
+            corrected_sent = corrector.correct(sentence)
+            st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True)
+    st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.")