# %load apps/slider.py from cgitb import html import time import streamlit as st from annotated_text import annotated_text import json import random import corrector import classifier from difflib import SequenceMatcher def underline_diff(a,b): # source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline seqm = SequenceMatcher(None, a.split(),b.split()) output= [] for opcode, a0, a1, b0, b1 in seqm.get_opcodes(): if opcode == 'equal': output.append(' '.join(seqm.a[a0:a1])) elif opcode == 'insert': output.append("" + ' '.join(seqm.b[b0:b1]) + "") elif opcode == 'delete': output.append("" + ' '.join(seqm.a[a0:a1]) + "") elif opcode == 'replace': output.append("" + ' '.join(seqm.b[b0:b1]) + "") # + " " + ' '.join(seqm.a[a0:a1]) + "") return ' '.join(output) example_sentences = [sent["errorful"] for sent in json.load(open("dev.json"))] st.title("Virtual tutor - Proof of concept") with st.sidebar: st.image("seedlang_logo.png", width=60) st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.") ## include 5 error types st.subheader("Error types") st.write("This PoC corrects and detects 5 type of French beginner mistakes:") error_descriptions = { "ADJ": """adjective has wrong gender agreement (*la grand table)""", "ART": """article has wrong gender agreement (*le table)""", "ELI": """elision is missing (*je adore)""", "FIN": """non-finite form of the verb instead of a finite form (*je adorer)""", "NEG": """problem with the position of the negation (*je ne pas adore)""" } for error_type, colour in classifier.COLOURS.items(): label = f"""{error_type} - {error_descriptions[error_type]}""" st.markdown(label, unsafe_allow_html=True) st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.") if "value" not in st.session_state: st.session_state["value"] = random.choice(example_sentences) random_ex = st.button("Get another random example") if random_ex: st.session_state["value"] = random.choice(example_sentences) sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"]) submit = st.button("Correct") if submit: col1, col2 = st.columns(2) with col1: st.subheader("Labelled errors") with st.spinner("Labelling errors"): annotated_text(*classifier.annotate(sentence)) with col2: st.subheader("Corrected sentence") with st.spinner("Correcting the sentence"): corrected_sent = corrector.correct(sentence) st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True) st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.")