# %load apps/slider.py
from cgitb import html
import time
import streamlit as st
from annotated_text import annotated_text
import json
import random
import corrector
import classifier
from difflib import SequenceMatcher
def underline_diff(a,b):
# source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline
seqm = SequenceMatcher(None, a.split(),b.split())
output= []
for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
if opcode == 'equal':
output.append(' '.join(seqm.a[a0:a1]))
elif opcode == 'insert':
output.append("" + ' '.join(seqm.b[b0:b1]) + "")
elif opcode == 'delete':
output.append("" + ' '.join(seqm.a[a0:a1]) + "")
elif opcode == 'replace':
output.append("" + ' '.join(seqm.b[b0:b1]) + "") # + " " + ' '.join(seqm.a[a0:a1]) + "")
return ' '.join(output)
example_sentences = [sent["errorful"] for sent in json.load(open("dev.json"))]
st.title("Virtual tutor - Proof of concept")
with st.sidebar:
st.image("seedlang_logo.png", width=60)
st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.")
## include 5 error types
st.subheader("Error types")
st.write("This PoC corrects and detects 5 type of French beginner mistakes:")
error_descriptions = {
"ADJ": """adjective has wrong gender agreement (*la grand table)""",
"ART": """article has wrong gender agreement (*le table)""",
"ELI": """elision is missing (*je adore)""",
"FIN": """non-finite form of the verb instead of a finite form (*je adorer)""",
"NEG": """problem with the position of the negation (*je ne pas adore)"""
}
for error_type, colour in classifier.COLOURS.items():
label = f"""{error_type} - {error_descriptions[error_type]}"""
st.markdown(label, unsafe_allow_html=True)
st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.")
if "value" not in st.session_state:
st.session_state["value"] = random.choice(example_sentences)
random_ex = st.button("Get another random example")
if random_ex:
st.session_state["value"] = random.choice(example_sentences)
sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"])
submit = st.button("Correct")
if submit:
col1, col2 = st.columns(2)
with col1:
st.subheader("Labelled errors")
with st.spinner("Labelling errors"):
annotated_text(*classifier.annotate(sentence))
with col2:
st.subheader("Corrected sentence")
with st.spinner("Correcting the sentence"):
corrected_sent = corrector.correct(sentence)
st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True)
st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.")