Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %load apps/slider.py
|
2 |
+
from cgitb import html
|
3 |
+
import time
|
4 |
+
import streamlit as st
|
5 |
+
from annotated_text import annotated_text
|
6 |
+
import json
|
7 |
+
import random
|
8 |
+
import corrector
|
9 |
+
import classifier
|
10 |
+
from difflib import SequenceMatcher
|
11 |
+
|
12 |
+
def underline_diff(a,b):
|
13 |
+
# source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline
|
14 |
+
seqm = SequenceMatcher(None, a.split(),b.split())
|
15 |
+
output= []
|
16 |
+
for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
|
17 |
+
if opcode == 'equal':
|
18 |
+
output.append(' '.join(seqm.a[a0:a1]))
|
19 |
+
elif opcode == 'insert':
|
20 |
+
output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>")
|
21 |
+
elif opcode == 'delete':
|
22 |
+
output.append("<del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
|
23 |
+
elif opcode == 'replace':
|
24 |
+
output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>") # + " <del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
|
25 |
+
return ' '.join(output)
|
26 |
+
|
27 |
+
example_sentences = [sent["errorful"] for sent in json.load(open("dev.json"))]
|
28 |
+
|
29 |
+
st.title("Virtual tutor - Proof of concept")
|
30 |
+
|
31 |
+
with st.sidebar:
|
32 |
+
st.image("seedlang_logo.png", width=60)
|
33 |
+
st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.")
|
34 |
+
|
35 |
+
## include 5 error types
|
36 |
+
st.subheader("Error types")
|
37 |
+
st.write("This PoC corrects and detects 5 type of French beginner mistakes:")
|
38 |
+
|
39 |
+
error_descriptions = {
|
40 |
+
"ADJ": """adjective has wrong gender agreement (<i>*la grand table</i>)""",
|
41 |
+
"ART": """article has wrong gender agreement (<i>*le table</i>)""",
|
42 |
+
"ELI": """elision is missing (<i>*je adore</i>)""",
|
43 |
+
"FIN": """non-finite form of the verb instead of a finite form (<i>*je adorer</i>)""",
|
44 |
+
"NEG": """problem with the position of the negation (<i>*je ne pas adore</i>)"""
|
45 |
+
}
|
46 |
+
|
47 |
+
for error_type, colour in classifier.COLOURS.items():
|
48 |
+
label = f"""<span style="background: {colour}; border-radius: 0.33rem; padding: 0.125rem 0.5rem; overflow: hidden;"><span style="padding-left: 0.5rem; text-transform: uppercase;"><span style="font-size: 0.67em; opacity: 0.8;">{error_type}</span></span></span> - {error_descriptions[error_type]}"""
|
49 |
+
st.markdown(label, unsafe_allow_html=True)
|
50 |
+
|
51 |
+
st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.")
|
52 |
+
|
53 |
+
if "value" not in st.session_state:
|
54 |
+
st.session_state["value"] = random.choice(example_sentences)
|
55 |
+
|
56 |
+
|
57 |
+
random_ex = st.button("Get another random example")
|
58 |
+
|
59 |
+
if random_ex:
|
60 |
+
st.session_state["value"] = random.choice(example_sentences)
|
61 |
+
|
62 |
+
|
63 |
+
sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"])
|
64 |
+
submit = st.button("Correct")
|
65 |
+
|
66 |
+
if submit:
|
67 |
+
col1, col2 = st.columns(2)
|
68 |
+
with col1:
|
69 |
+
st.subheader("Labelled errors")
|
70 |
+
with st.spinner("Labelling errors"):
|
71 |
+
annotated_text(*classifier.annotate(sentence))
|
72 |
+
with col2:
|
73 |
+
st.subheader("Corrected sentence")
|
74 |
+
with st.spinner("Correcting the sentence"):
|
75 |
+
corrected_sent = corrector.correct(sentence)
|
76 |
+
st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True)
|
77 |
+
st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.")
|