Spaces:
Runtime error
Runtime error
Victoria Slocum
commited on
Commit
β’
a327de9
1
Parent(s):
8287126
Update: Add model type
Browse files- README.md +1 -1
- app.py +104 -96
- requirements.txt +1 -1
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
colorFrom: green
|
5 |
colorTo: gray
|
|
|
1 |
---
|
2 |
+
title: Gradio Pipeline Visualizer
|
3 |
emoji: π
|
4 |
colorFrom: green
|
5 |
colorTo: gray
|
app.py
CHANGED
@@ -41,42 +41,29 @@ def download_svg(svg):
|
|
41 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
42 |
return html
|
43 |
|
44 |
-
# def download_png(svg):
|
45 |
-
# encode = base64.b64encode(bytes(svg, 'utf-8'))
|
46 |
-
# svg_uri = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
|
47 |
-
# output = cairosvg.svg2png(url=svg_uri)
|
48 |
-
# encoded = base64.b64encode(output)
|
49 |
-
# img = 'data:image/png;base64,' + str(encoded)[2:-1]
|
50 |
-
# html = f'<a download="displacy.png" href="{img}" style="{button_css}">Download as PNG</a>'
|
51 |
-
# return html
|
52 |
-
|
53 |
-
# def download(type, svg):
|
54 |
-
# if type == 'png':
|
55 |
-
# return download_png(svg)
|
56 |
-
# elif type == 'svg':
|
57 |
-
# return download_svg(svg)
|
58 |
-
|
59 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
60 |
-
|
|
|
61 |
doc = nlp(text)
|
62 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
63 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
64 |
svg = displacy.render(doc, style="dep", options=options)
|
65 |
download = download_svg(svg)
|
66 |
-
return svg, download
|
67 |
|
68 |
|
69 |
def entity(text, ents, model):
|
70 |
-
|
|
|
71 |
doc = nlp(text)
|
72 |
options = {"ents": ents}
|
73 |
svg = displacy.render(doc, style="ent", options=options)
|
74 |
-
|
75 |
-
return svg
|
76 |
|
77 |
|
78 |
def token(text, attributes, model):
|
79 |
-
|
|
|
80 |
data = []
|
81 |
doc = nlp(text)
|
82 |
for tok in doc:
|
@@ -85,11 +72,12 @@ def token(text, attributes, model):
|
|
85 |
tok_data.append(getattr(tok, attr))
|
86 |
data.append(tok_data)
|
87 |
data = pd.DataFrame(data, columns=attributes)
|
88 |
-
return data
|
89 |
|
90 |
|
91 |
def default_token(text, attributes, model):
|
92 |
-
|
|
|
93 |
data = []
|
94 |
doc = nlp(text)
|
95 |
for tok in doc:
|
@@ -97,27 +85,30 @@ def default_token(text, attributes, model):
|
|
97 |
for attr in attributes:
|
98 |
tok_data.append(getattr(tok, attr))
|
99 |
data.append(tok_data)
|
100 |
-
return data
|
101 |
|
102 |
|
103 |
def random_vectors(text, model):
|
104 |
-
|
|
|
105 |
doc = nlp(text)
|
106 |
n_chunks = [chunk for chunk in doc.noun_chunks if doc.noun_chunks]
|
107 |
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
|
108 |
'PUNCT', "PROPN"]]
|
109 |
str_list = n_chunks + words
|
110 |
choice = random.choices(str_list, k=2)
|
111 |
-
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
|
112 |
|
113 |
|
114 |
def vectors(input1, input2, model):
|
115 |
-
|
116 |
-
|
|
|
117 |
|
118 |
|
119 |
def span(text, span1, span2, label1, label2, model):
|
120 |
-
|
|
|
121 |
doc = nlp(text)
|
122 |
if span1:
|
123 |
idx1_1 = 0
|
@@ -155,8 +146,7 @@ def span(text, span1, span2, label1, label2, model):
|
|
155 |
]
|
156 |
|
157 |
svg = displacy.render(doc, style="span")
|
158 |
-
|
159 |
-
return svg
|
160 |
|
161 |
|
162 |
def get_text(model):
|
@@ -200,13 +190,21 @@ with demo:
|
|
200 |
with gr.Column():
|
201 |
gr.Markdown("")
|
202 |
|
203 |
-
button = gr.Button("
|
204 |
with gr.Box():
|
205 |
with gr.Column():
|
206 |
-
gr.
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
with gr.Row():
|
211 |
with gr.Column():
|
212 |
col_punct = gr.Checkbox(
|
@@ -221,43 +219,47 @@ with demo:
|
|
221 |
text = gr.Textbox(
|
222 |
label="Text Color", value="black")
|
223 |
|
224 |
-
|
225 |
DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL)[0])
|
226 |
with gr.Row():
|
227 |
with gr.Column():
|
228 |
-
dep_button = gr.Button("
|
229 |
with gr.Column():
|
230 |
-
dep_download_button = gr.HTML(value=download_svg(
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
gr.Markdown(" ")
|
235 |
with gr.Box():
|
236 |
with gr.Column():
|
237 |
-
gr.
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
ent_input = gr.CheckboxGroup(
|
242 |
DEFAULT_ENTS, value=DEFAULT_ENTS)
|
243 |
ent_output = gr.HTML(value=entity(
|
244 |
-
DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL))
|
245 |
-
ent_button = gr.Button("
|
246 |
-
# with gr.Row():
|
247 |
-
# with gr.Column():
|
248 |
-
# ent_button = gr.Button("Generate Entity Recognizer", variant="primary")
|
249 |
-
# with gr.Column():
|
250 |
-
# ent_download_button = gr.HTML(value=download_svg(ent_output.value))
|
251 |
-
# with gr.Column():
|
252 |
-
# gr.Markdown(" ")
|
253 |
-
# with gr.Column():
|
254 |
-
# gr.Markdown(" ")
|
255 |
with gr.Box():
|
256 |
with gr.Column():
|
257 |
-
gr.
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
with gr.Row():
|
262 |
with gr.Column():
|
263 |
tok_input = gr.CheckboxGroup(
|
@@ -265,14 +267,22 @@ with demo:
|
|
265 |
with gr.Column():
|
266 |
gr.Markdown("")
|
267 |
tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(
|
268 |
-
DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL), overflow_row_behaviour="paginate")
|
269 |
-
tok_button = gr.Button("
|
270 |
with gr.Box():
|
271 |
with gr.Column():
|
272 |
-
gr.
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
with gr.Row():
|
277 |
with gr.Column():
|
278 |
sim_text1 = gr.Textbox(
|
@@ -285,15 +295,22 @@ with demo:
|
|
285 |
label="Similarity Score", value="0.12")
|
286 |
with gr.Column():
|
287 |
gr.Markdown("")
|
288 |
-
sim_random_button = gr.Button("
|
289 |
-
sim_button = gr.Button("
|
290 |
with gr.Box():
|
291 |
with gr.Column():
|
292 |
-
gr.
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
with gr.Row():
|
298 |
with gr.Column():
|
299 |
span1 = gr.Textbox(
|
@@ -317,39 +334,30 @@ with demo:
|
|
317 |
with gr.Column():
|
318 |
gr.Markdown("")
|
319 |
span_output = gr.HTML(value=span(
|
320 |
-
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL))
|
321 |
-
span_button = gr.Button("
|
322 |
-
# with gr.Row():
|
323 |
-
# with gr.Column():
|
324 |
-
# span_button = gr.Button("Generate Spans", variant="primary")
|
325 |
-
# with gr.Column():
|
326 |
-
# span_download_button = gr.HTML(value=download_svg(span_output.value))
|
327 |
-
# with gr.Column():
|
328 |
-
# gr.Markdown(" ")
|
329 |
-
# with gr.Column():
|
330 |
-
# gr.Markdown(" ")
|
331 |
|
332 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
333 |
button.click(dependency, inputs=[
|
334 |
-
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[
|
335 |
button.click(
|
336 |
-
entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output])
|
337 |
button.click(
|
338 |
-
token, inputs=[text_input, tok_input, model_input], outputs=tok_output)
|
339 |
button.click(vectors, inputs=[sim_text1,
|
340 |
-
sim_text2, model_input], outputs=sim_output)
|
341 |
button.click(
|
342 |
-
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output])
|
343 |
dep_button.click(dependency, inputs=[
|
344 |
-
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[
|
345 |
ent_button.click(
|
346 |
-
entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output])
|
347 |
tok_button.click(
|
348 |
-
token, inputs=[text_input, tok_input, model_input], outputs=[tok_output])
|
349 |
sim_button.click(vectors, inputs=[
|
350 |
-
sim_text1, sim_text2, model_input], outputs=sim_output)
|
351 |
span_button.click(
|
352 |
-
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output])
|
353 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
354 |
-
sim_output, sim_text1, sim_text2])
|
355 |
demo.launch()
|
|
|
41 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
42 |
return html
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
45 |
+
model_name = model + "_sm"
|
46 |
+
nlp = spacy.load(model_name)
|
47 |
doc = nlp(text)
|
48 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
49 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
50 |
svg = displacy.render(doc, style="dep", options=options)
|
51 |
download = download_svg(svg)
|
52 |
+
return svg, download, model_name
|
53 |
|
54 |
|
55 |
def entity(text, ents, model):
|
56 |
+
model_name = model + "_sm"
|
57 |
+
nlp = spacy.load(model_name)
|
58 |
doc = nlp(text)
|
59 |
options = {"ents": ents}
|
60 |
svg = displacy.render(doc, style="ent", options=options)
|
61 |
+
return svg, model_name
|
|
|
62 |
|
63 |
|
64 |
def token(text, attributes, model):
|
65 |
+
model_name = model + "_sm"
|
66 |
+
nlp = spacy.load(model_name)
|
67 |
data = []
|
68 |
doc = nlp(text)
|
69 |
for tok in doc:
|
|
|
72 |
tok_data.append(getattr(tok, attr))
|
73 |
data.append(tok_data)
|
74 |
data = pd.DataFrame(data, columns=attributes)
|
75 |
+
return data, model_name
|
76 |
|
77 |
|
78 |
def default_token(text, attributes, model):
|
79 |
+
model_name = model + "_sm"
|
80 |
+
nlp = spacy.load(model_name)
|
81 |
data = []
|
82 |
doc = nlp(text)
|
83 |
for tok in doc:
|
|
|
85 |
for attr in attributes:
|
86 |
tok_data.append(getattr(tok, attr))
|
87 |
data.append(tok_data)
|
88 |
+
return data, model_name
|
89 |
|
90 |
|
91 |
def random_vectors(text, model):
|
92 |
+
model_name = model + "_md"
|
93 |
+
nlp = spacy.load(model_name)
|
94 |
doc = nlp(text)
|
95 |
n_chunks = [chunk for chunk in doc.noun_chunks if doc.noun_chunks]
|
96 |
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
|
97 |
'PUNCT', "PROPN"]]
|
98 |
str_list = n_chunks + words
|
99 |
choice = random.choices(str_list, k=2)
|
100 |
+
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
|
101 |
|
102 |
|
103 |
def vectors(input1, input2, model):
|
104 |
+
model_name = model + "_md"
|
105 |
+
nlp = spacy.load(model_name)
|
106 |
+
return round(nlp(input1).similarity(nlp(input2)), 2), model_name
|
107 |
|
108 |
|
109 |
def span(text, span1, span2, label1, label2, model):
|
110 |
+
model_name = model + "_sm"
|
111 |
+
nlp = spacy.load(model_name)
|
112 |
doc = nlp(text)
|
113 |
if span1:
|
114 |
idx1_1 = 0
|
|
|
146 |
]
|
147 |
|
148 |
svg = displacy.render(doc, style="span")
|
149 |
+
return svg, model_name
|
|
|
150 |
|
151 |
|
152 |
def get_text(model):
|
|
|
190 |
with gr.Column():
|
191 |
gr.Markdown("")
|
192 |
|
193 |
+
button = gr.Button("Update", variant="primary")
|
194 |
with gr.Box():
|
195 |
with gr.Column():
|
196 |
+
with gr.Row():
|
197 |
+
with gr.Column():
|
198 |
+
gr.Markdown(
|
199 |
+
"## [Dependency Parser](https://spacy.io/usage/visualizers#dep)")
|
200 |
+
gr.Markdown(
|
201 |
+
"The dependency visualizer shows part-of-speech tags and syntactic dependencies")
|
202 |
+
with gr.Column():
|
203 |
+
with gr.Row():
|
204 |
+
with gr.Column():
|
205 |
+
gr.Markdown(" ")
|
206 |
+
with gr.Column():
|
207 |
+
dep_model = gr.Textbox(label="Model", value="en_core_web_sm")
|
208 |
with gr.Row():
|
209 |
with gr.Column():
|
210 |
col_punct = gr.Checkbox(
|
|
|
219 |
text = gr.Textbox(
|
220 |
label="Text Color", value="black")
|
221 |
|
222 |
+
dep_output = gr.HTML(value=dependency(
|
223 |
DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL)[0])
|
224 |
with gr.Row():
|
225 |
with gr.Column():
|
226 |
+
dep_button = gr.Button("Update Dependency Parser", variant="primary")
|
227 |
with gr.Column():
|
228 |
+
dep_download_button = gr.HTML(value=download_svg(dep_output.value))
|
|
|
|
|
|
|
229 |
gr.Markdown(" ")
|
230 |
with gr.Box():
|
231 |
with gr.Column():
|
232 |
+
with gr.Row():
|
233 |
+
with gr.Column():
|
234 |
+
gr.Markdown(
|
235 |
+
"## [Entity Recognizer](https://spacy.io/usage/visualizers#ent)")
|
236 |
+
gr.Markdown(
|
237 |
+
"The entity visualizer highlights named entities and their labels in a text")
|
238 |
+
with gr.Column():
|
239 |
+
with gr.Row():
|
240 |
+
with gr.Column():
|
241 |
+
gr.Markdown(" ")
|
242 |
+
with gr.Column():
|
243 |
+
ent_model = gr.Textbox(label="Model", value="en_core_web_sm")
|
244 |
ent_input = gr.CheckboxGroup(
|
245 |
DEFAULT_ENTS, value=DEFAULT_ENTS)
|
246 |
ent_output = gr.HTML(value=entity(
|
247 |
+
DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL)[0])
|
248 |
+
ent_button = gr.Button("Update Entity Recognizer", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
with gr.Box():
|
250 |
with gr.Column():
|
251 |
+
with gr.Row():
|
252 |
+
with gr.Column():
|
253 |
+
gr.Markdown(
|
254 |
+
"## [Token Properties](https://spacy.io/usage/linguistic-features)")
|
255 |
+
gr.Markdown(
|
256 |
+
"When you put in raw text to spaCy, it returns a Doc object with different linguistic features")
|
257 |
+
with gr.Column():
|
258 |
+
with gr.Row():
|
259 |
+
with gr.Column():
|
260 |
+
gr.Markdown(" ")
|
261 |
+
with gr.Column():
|
262 |
+
tok_model = gr.Textbox(label="Model", value="en_core_web_sm")
|
263 |
with gr.Row():
|
264 |
with gr.Column():
|
265 |
tok_input = gr.CheckboxGroup(
|
|
|
267 |
with gr.Column():
|
268 |
gr.Markdown("")
|
269 |
tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(
|
270 |
+
DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL)[0], overflow_row_behaviour="paginate")
|
271 |
+
tok_button = gr.Button("Update Token Properties", variant="primary")
|
272 |
with gr.Box():
|
273 |
with gr.Column():
|
274 |
+
with gr.Row():
|
275 |
+
with gr.Column():
|
276 |
+
gr.Markdown(
|
277 |
+
"## [Word and Phrase Similarity](https://spacy.io/usage/linguistic-features#vectors-similarity)")
|
278 |
+
gr.Markdown(
|
279 |
+
"Words and spans have similarity ratings based off of their word vectors, or word embeddings")
|
280 |
+
with gr.Column():
|
281 |
+
with gr.Row():
|
282 |
+
with gr.Column():
|
283 |
+
gr.Markdown(" ")
|
284 |
+
with gr.Column():
|
285 |
+
sim_model = gr.Textbox(label="Model", value="en_core_web_md")
|
286 |
with gr.Row():
|
287 |
with gr.Column():
|
288 |
sim_text1 = gr.Textbox(
|
|
|
295 |
label="Similarity Score", value="0.12")
|
296 |
with gr.Column():
|
297 |
gr.Markdown("")
|
298 |
+
sim_random_button = gr.Button("Update random words")
|
299 |
+
sim_button = gr.Button("Update similarity", variant="primary")
|
300 |
with gr.Box():
|
301 |
with gr.Column():
|
302 |
+
with gr.Row():
|
303 |
+
with gr.Column():
|
304 |
+
gr.Markdown(
|
305 |
+
"## [Spans](https://spacy.io/usage/visualizers#span)")
|
306 |
+
gr.Markdown(
|
307 |
+
"The span visualizer highlights overlapping spans in a text")
|
308 |
+
with gr.Column():
|
309 |
+
with gr.Row():
|
310 |
+
with gr.Column():
|
311 |
+
gr.Markdown(" ")
|
312 |
+
with gr.Column():
|
313 |
+
span_model = gr.Textbox(label="Model", value="en_core_web_sm")
|
314 |
with gr.Row():
|
315 |
with gr.Column():
|
316 |
span1 = gr.Textbox(
|
|
|
334 |
with gr.Column():
|
335 |
gr.Markdown("")
|
336 |
span_output = gr.HTML(value=span(
|
337 |
+
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
|
338 |
+
span_button = gr.Button("Update Spans", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
341 |
button.click(dependency, inputs=[
|
342 |
+
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
343 |
button.click(
|
344 |
+
entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output, ent_model])
|
345 |
button.click(
|
346 |
+
token, inputs=[text_input, tok_input, model_input], outputs=[tok_output, tok_model])
|
347 |
button.click(vectors, inputs=[sim_text1,
|
348 |
+
sim_text2, model_input], outputs=[sim_output, sim_model])
|
349 |
button.click(
|
350 |
+
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
351 |
dep_button.click(dependency, inputs=[
|
352 |
+
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
353 |
ent_button.click(
|
354 |
+
entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output, ent_model])
|
355 |
tok_button.click(
|
356 |
+
token, inputs=[text_input, tok_input, model_input], outputs=[tok_output, tok_model])
|
357 |
sim_button.click(vectors, inputs=[
|
358 |
+
sim_text1, sim_text2, model_input], outputs=[sim_output, sim_model])
|
359 |
span_button.click(
|
360 |
+
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
361 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
362 |
+
sim_output, sim_text1, sim_text2, sim_model])
|
363 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
pandas==1.4.2
|
2 |
gradio==3.0.18
|
3 |
-
spacy==3.
|
4 |
|
5 |
https://huggingface.co/spacy/ca_core_news_md/resolve/main/ca_core_news_md-any-py3-none-any.whl
|
6 |
https://huggingface.co/spacy/ca_core_news_sm/resolve/main/ca_core_news_sm-any-py3-none-any.whl
|
|
|
1 |
pandas==1.4.2
|
2 |
gradio==3.0.18
|
3 |
+
spacy==3.3.1
|
4 |
|
5 |
https://huggingface.co/spacy/ca_core_news_md/resolve/main/ca_core_news_md-any-py3-none-any.whl
|
6 |
https://huggingface.co/spacy/ca_core_news_sm/resolve/main/ca_core_news_sm-any-py3-none-any.whl
|