Spaces:
Running
Running
Add application file
Browse files- app.py +159 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spacy
|
3 |
+
from spacy import displacy
|
4 |
+
from cefrpy import CEFRSpaCyAnalyzer, CEFRLevel
|
5 |
+
|
6 |
+
MODEL = "en_core_web_sm"
|
7 |
+
|
8 |
+
ALL_ENTS = [
|
9 |
+
'CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE',
|
10 |
+
'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT',
|
11 |
+
'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART'
|
12 |
+
]
|
13 |
+
|
14 |
+
DEFAULT_ENTITY_ITEMS_TO_SKIP = [
|
15 |
+
'QUANTITY', 'MONEY', 'LANGUAGE', 'LAW',
|
16 |
+
'WORK_OF_ART', 'PRODUCT', 'GPE',
|
17 |
+
'ORG', 'FAC', 'PERSON'
|
18 |
+
]
|
19 |
+
|
20 |
+
DEFAULT_TEXT = """The world's oldest known recipe is for beer. It dates back to around 5,000 BC and was found in ancient Sumeria (modern-day Iraq).
|
21 |
+
|
22 |
+
Did you know that the shortest war in history lasted only 38 minutes? It occurred between Britain and Zanzibar on August 27, 1896. Zanzibar surrendered after the British issued an ultimatum to end their rule.
|
23 |
+
|
24 |
+
Some of the longest words in the English language can leave you tongue-tied! One example is "pneumonoultramicroscopicsilicovolcanoconiosis," which refers to a lung disease caused by inhaling very fine silica dust. With 45 letters, it's often cited as one of the longest words in English dictionaries. However, it's more of a curiosity than a practical term used in everyday language!
|
25 |
+
|
26 |
+
In 2006, a Coca-Cola employee offered to sell Coca-Cola secrets to Pepsi. Pepsi responded by notifying Coca-Cola, and the FBI set up a sting operation to catch the culprit.
|
27 |
+
|
28 |
+
Sir Isaac Newton, the renowned mathematician and physicist, invented the cat flap. While studying at Cambridge University, Newton had a pet cat named Spithead. He became annoyed when Spithead would interrupt his experiments, so he designed a small door in his study door through which the cat could come and go freely."""
|
29 |
+
|
30 |
+
DISPLACY_RENDER_OPTIONS = {
|
31 |
+
"colors": {
|
32 |
+
"A1": "#b0c4de",
|
33 |
+
"A2": "#87ceeb",
|
34 |
+
"B1": "#90ee90",
|
35 |
+
"B2": "#adff2f",
|
36 |
+
"C1": "#ffd700",
|
37 |
+
"C2": "#ff9380",
|
38 |
+
"SKIP": "#ffafed",
|
39 |
+
"UNKNOWN": "#BCAAA4"
|
40 |
+
}
|
41 |
+
}
|
42 |
+
|
43 |
+
ABBREVIATION_MAPPING = {
|
44 |
+
"'m": "am",
|
45 |
+
"'s": "is",
|
46 |
+
"'re": "are",
|
47 |
+
"'ve": "have",
|
48 |
+
"'d": "had",
|
49 |
+
"n't": "not",
|
50 |
+
"'ll": "will"
|
51 |
+
}
|
52 |
+
|
53 |
+
LINKS_HTML = """
|
54 |
+
<p>
|
55 |
+
 Github: <a href="https://github.com/Maximax67/cefrpy">https://github.com/Maximax67/cefrpy</a><br>
|
56 |
+
 Docs: <a href="https://maximax67.github.io/cefrpy">https://maximax67.github.io/cefrpy</a><br>
|
57 |
+
</p>
|
58 |
+
"""
|
59 |
+
|
60 |
+
CSS = """
|
61 |
+
h1 {
|
62 |
+
padding-top: 5px;
|
63 |
+
text-align: center;
|
64 |
+
display:block;
|
65 |
+
}
|
66 |
+
|
67 |
+
.hide-container, .gr-group {
|
68 |
+
background: white !important;
|
69 |
+
}
|
70 |
+
"""
|
71 |
+
|
72 |
+
nlp = spacy.load(MODEL)
|
73 |
+
|
74 |
+
def get_dict_ents(text: str, tokens: list[tuple[str, str, bool, float, int, int]]) -> dict:
|
75 |
+
ents = []
|
76 |
+
|
77 |
+
for token in tokens:
|
78 |
+
if token[3]:
|
79 |
+
ents.append({
|
80 |
+
"start": token[4],
|
81 |
+
"end": token[5],
|
82 |
+
"label": str(CEFRLevel(round(token[3])))
|
83 |
+
})
|
84 |
+
elif token[0].isalpha():
|
85 |
+
ents.append({
|
86 |
+
"start": token[4],
|
87 |
+
"end": token[5],
|
88 |
+
"label": "SKIP" if token[2] else "UNKNOWN"
|
89 |
+
})
|
90 |
+
|
91 |
+
dict_ents = {
|
92 |
+
"text": text,
|
93 |
+
"ents": ents
|
94 |
+
}
|
95 |
+
|
96 |
+
return dict_ents
|
97 |
+
|
98 |
+
|
99 |
+
def render_visualization(text: str, ents_to_skip: list[str]) -> str:
|
100 |
+
doc = nlp(text)
|
101 |
+
|
102 |
+
text_analyzer = CEFRSpaCyAnalyzer(entity_types_to_skip=ents_to_skip, abbreviation_mapping=ABBREVIATION_MAPPING)
|
103 |
+
tokens = text_analyzer.analize_doc(doc)
|
104 |
+
dict_ents = get_dict_ents(text, tokens)
|
105 |
+
html = displacy.render(dict_ents, manual=True, style="ent", options=DISPLACY_RENDER_OPTIONS)
|
106 |
+
|
107 |
+
return html
|
108 |
+
|
109 |
+
|
110 |
+
demo = gr.Blocks(css=CSS)
|
111 |
+
|
112 |
+
with demo:
|
113 |
+
with gr.Row(variant="default"):
|
114 |
+
with gr.Group():
|
115 |
+
with gr.Column():
|
116 |
+
with gr.Row():
|
117 |
+
gr.Markdown("# Gradio Demo: cefrpy")
|
118 |
+
gr.HTML(LINKS_HTML)
|
119 |
+
|
120 |
+
with gr.Row():
|
121 |
+
text_input = gr.TextArea(
|
122 |
+
value=DEFAULT_TEXT,
|
123 |
+
interactive=True,
|
124 |
+
max_lines=500,
|
125 |
+
label="Input Text",
|
126 |
+
show_copy_button=True
|
127 |
+
)
|
128 |
+
|
129 |
+
with gr.Row():
|
130 |
+
ent_input = gr.CheckboxGroup(
|
131 |
+
ALL_ENTS,
|
132 |
+
value=DEFAULT_ENTITY_ITEMS_TO_SKIP,
|
133 |
+
label="Entity types to skip CEFR"
|
134 |
+
)
|
135 |
+
|
136 |
+
with gr.Row():
|
137 |
+
clear_button = gr.ClearButton(text_input)
|
138 |
+
|
139 |
+
render_button = gr.Button(
|
140 |
+
"Render",
|
141 |
+
variant="primary"
|
142 |
+
)
|
143 |
+
|
144 |
+
with gr.Group():
|
145 |
+
with gr.Row():
|
146 |
+
gr.Markdown("# Words CEFR level visualization")
|
147 |
+
|
148 |
+
with gr.Row():
|
149 |
+
render_output = gr.HTML(
|
150 |
+
value=render_visualization(DEFAULT_TEXT, DEFAULT_ENTITY_ITEMS_TO_SKIP),
|
151 |
+
)
|
152 |
+
|
153 |
+
render_button.click(
|
154 |
+
render_visualization,
|
155 |
+
inputs=[text_input, ent_input],
|
156 |
+
outputs=render_output
|
157 |
+
)
|
158 |
+
|
159 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
cefrpy
|
3 |
+
spacy
|
4 |
+
|
5 |
+
https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|