Maximax67 commited on
Commit
95b2f1e
1 Parent(s): dfbff4d

Add application file

Browse files
Files changed (2) hide show
  1. app.py +159 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spacy
3
+ from spacy import displacy
4
+ from cefrpy import CEFRSpaCyAnalyzer, CEFRLevel
5
+
6
+ MODEL = "en_core_web_sm"
7
+
8
+ ALL_ENTS = [
9
+ 'CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE',
10
+ 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT',
11
+ 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART'
12
+ ]
13
+
14
+ DEFAULT_ENTITY_ITEMS_TO_SKIP = [
15
+ 'QUANTITY', 'MONEY', 'LANGUAGE', 'LAW',
16
+ 'WORK_OF_ART', 'PRODUCT', 'GPE',
17
+ 'ORG', 'FAC', 'PERSON'
18
+ ]
19
+
20
+ DEFAULT_TEXT = """The world's oldest known recipe is for beer. It dates back to around 5,000 BC and was found in ancient Sumeria (modern-day Iraq).
21
+
22
+ Did you know that the shortest war in history lasted only 38 minutes? It occurred between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after the British issued an ultimatum to end their rule.
23
+
24
+ Some of the longest words in the English language can leave you tongue-tied! One example is "pneumonoultramicroscopicsilicovolcanoconiosis," which refers to a lung disease caused by inhaling very fine silica dust. With 45 letters, it's often cited as one of the longest words in English dictionaries. However, it's more of a curiosity than a practical term used in everyday language!
25
+
26
+ In 2006, a Coca-Cola employee offered to sell Coca-Cola secrets to Pepsi. Pepsi responded by notifying Coca-Cola, and the FBI set up a sting operation to catch the culprit.
27
+
28
+ Sir Isaac Newton, the renowned mathematician and physicist, invented the cat flap. While studying at Cambridge University, Newton had a pet cat named Spithead. He became annoyed when Spithead would interrupt his experiments, so he designed a small door in his study door through which the cat could come and go freely."""
29
+
30
+ DISPLACY_RENDER_OPTIONS = {
31
+ "colors": {
32
+ "A1": "#b0c4de",
33
+ "A2": "#87ceeb",
34
+ "B1": "#90ee90",
35
+ "B2": "#adff2f",
36
+ "C1": "#ffd700",
37
+ "C2": "#ff9380",
38
+ "SKIP": "#ffafed",
39
+ "UNKNOWN": "#BCAAA4"
40
+ }
41
+ }
42
+
43
+ ABBREVIATION_MAPPING = {
44
+ "'m": "am",
45
+ "'s": "is",
46
+ "'re": "are",
47
+ "'ve": "have",
48
+ "'d": "had",
49
+ "n't": "not",
50
+ "'ll": "will"
51
+ }
52
+
53
+ LINKS_HTML = """
54
+ <p>
55
+ &ensp;Github: <a href="https://github.com/Maximax67/cefrpy">https://github.com/Maximax67/cefrpy</a><br>
56
+ &ensp;Docs: <a href="https://maximax67.github.io/cefrpy">https://maximax67.github.io/cefrpy</a><br>
57
+ </p>
58
+ """
59
+
60
+ CSS = """
61
+ h1 {
62
+ padding-top: 5px;
63
+ text-align: center;
64
+ display:block;
65
+ }
66
+
67
+ .hide-container, .gr-group {
68
+ background: white !important;
69
+ }
70
+ """
71
+
72
+ nlp = spacy.load(MODEL)
73
+
74
+ def get_dict_ents(text: str, tokens: list[tuple[str, str, bool, float, int, int]]) -> dict:
75
+ ents = []
76
+
77
+ for token in tokens:
78
+ if token[3]:
79
+ ents.append({
80
+ "start": token[4],
81
+ "end": token[5],
82
+ "label": str(CEFRLevel(round(token[3])))
83
+ })
84
+ elif token[0].isalpha():
85
+ ents.append({
86
+ "start": token[4],
87
+ "end": token[5],
88
+ "label": "SKIP" if token[2] else "UNKNOWN"
89
+ })
90
+
91
+ dict_ents = {
92
+ "text": text,
93
+ "ents": ents
94
+ }
95
+
96
+ return dict_ents
97
+
98
+
99
+ def render_visualization(text: str, ents_to_skip: list[str]) -> str:
100
+ doc = nlp(text)
101
+
102
+ text_analyzer = CEFRSpaCyAnalyzer(entity_types_to_skip=ents_to_skip, abbreviation_mapping=ABBREVIATION_MAPPING)
103
+ tokens = text_analyzer.analize_doc(doc)
104
+ dict_ents = get_dict_ents(text, tokens)
105
+ html = displacy.render(dict_ents, manual=True, style="ent", options=DISPLACY_RENDER_OPTIONS)
106
+
107
+ return html
108
+
109
+
110
+ demo = gr.Blocks(css=CSS)
111
+
112
+ with demo:
113
+ with gr.Row(variant="default"):
114
+ with gr.Group():
115
+ with gr.Column():
116
+ with gr.Row():
117
+ gr.Markdown("# Gradio Demo: cefrpy")
118
+ gr.HTML(LINKS_HTML)
119
+
120
+ with gr.Row():
121
+ text_input = gr.TextArea(
122
+ value=DEFAULT_TEXT,
123
+ interactive=True,
124
+ max_lines=500,
125
+ label="Input Text",
126
+ show_copy_button=True
127
+ )
128
+
129
+ with gr.Row():
130
+ ent_input = gr.CheckboxGroup(
131
+ ALL_ENTS,
132
+ value=DEFAULT_ENTITY_ITEMS_TO_SKIP,
133
+ label="Entity types to skip CEFR"
134
+ )
135
+
136
+ with gr.Row():
137
+ clear_button = gr.ClearButton(text_input)
138
+
139
+ render_button = gr.Button(
140
+ "Render",
141
+ variant="primary"
142
+ )
143
+
144
+ with gr.Group():
145
+ with gr.Row():
146
+ gr.Markdown("# Words CEFR level visualization")
147
+
148
+ with gr.Row():
149
+ render_output = gr.HTML(
150
+ value=render_visualization(DEFAULT_TEXT, DEFAULT_ENTITY_ITEMS_TO_SKIP),
151
+ )
152
+
153
+ render_button.click(
154
+ render_visualization,
155
+ inputs=[text_input, ent_input],
156
+ outputs=render_output
157
+ )
158
+
159
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ cefrpy
3
+ spacy
4
+
5
+ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl