alice-hml commited on
Commit
3ee4a68
1 Parent(s): 3236f2a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %load apps/slider.py
2
+ from cgitb import html
3
+ import time
4
+ import streamlit as st
5
+ from annotated_text import annotated_text
6
+ import json
7
+ import random
8
+ import corrector
9
+ import classifier
10
+ from difflib import SequenceMatcher
11
+
12
+ def underline_diff(a,b):
13
+ # source: https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline
14
+ seqm = SequenceMatcher(None, a.split(),b.split())
15
+ output= []
16
+ for opcode, a0, a1, b0, b1 in seqm.get_opcodes():
17
+ if opcode == 'equal':
18
+ output.append(' '.join(seqm.a[a0:a1]))
19
+ elif opcode == 'insert':
20
+ output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>")
21
+ elif opcode == 'delete':
22
+ output.append("<del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
23
+ elif opcode == 'replace':
24
+ output.append("<ins>" + ' '.join(seqm.b[b0:b1]) + "</ins>") # + " <del>" + ' '.join(seqm.a[a0:a1]) + "</del>")
25
+ return ' '.join(output)
26
+
27
+ example_sentences = [sent["errorful"] for sent in json.load(open("dev.json"))]
28
+
29
+ st.title("Virtual tutor - Proof of concept")
30
+
31
+ with st.sidebar:
32
+ st.image("seedlang_logo.png", width=60)
33
+ st.write("You can test here the Grammatical Error Correction and Classification models I developed as part of my internship at Seedlang.")
34
+
35
+ ## include 5 error types
36
+ st.subheader("Error types")
37
+ st.write("This PoC corrects and detects 5 type of French beginner mistakes:")
38
+
39
+ error_descriptions = {
40
+ "ADJ": """adjective has wrong gender agreement (<i>*la grand table</i>)""",
41
+ "ART": """article has wrong gender agreement (<i>*le table</i>)""",
42
+ "ELI": """elision is missing (<i>*je adore</i>)""",
43
+ "FIN": """non-finite form of the verb instead of a finite form (<i>*je adorer</i>)""",
44
+ "NEG": """problem with the position of the negation (<i>*je ne pas adore</i>)"""
45
+ }
46
+
47
+ for error_type, colour in classifier.COLOURS.items():
48
+ label = f"""<span style="background: {colour}; border-radius: 0.33rem; padding: 0.125rem 0.5rem; overflow: hidden;"><span style="padding-left: 0.5rem; text-transform: uppercase;"><span style="font-size: 0.67em; opacity: 0.8;">{error_type}</span></span></span> - {error_descriptions[error_type]}"""
49
+ st.markdown(label, unsafe_allow_html=True)
50
+
51
+ st.caption("The example sentences are taken from the development dataset. The errors were artificially generated, and many of the original sentences were too.")
52
+
53
+ if "value" not in st.session_state:
54
+ st.session_state["value"] = random.choice(example_sentences)
55
+
56
+
57
+ random_ex = st.button("Get another random example")
58
+
59
+ if random_ex:
60
+ st.session_state["value"] = random.choice(example_sentences)
61
+
62
+
63
+ sentence = st.text_input("Write a French sentence to correct and label", value=st.session_state["value"])
64
+ submit = st.button("Correct")
65
+
66
+ if submit:
67
+ col1, col2 = st.columns(2)
68
+ with col1:
69
+ st.subheader("Labelled errors")
70
+ with st.spinner("Labelling errors"):
71
+ annotated_text(*classifier.annotate(sentence))
72
+ with col2:
73
+ st.subheader("Corrected sentence")
74
+ with st.spinner("Correcting the sentence"):
75
+ corrected_sent = corrector.correct(sentence)
76
+ st.markdown(underline_diff(a=sentence, b=corrected_sent), unsafe_allow_html=True)
77
+ st.caption("Those two models are completely independent, but I eventually want to only correct the character spans where an error was detected and classified.")