Victoria Slocum commited on
Commit
a327de9
β€’
1 Parent(s): 8287126

Update: Add model type

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +104 -96
  3. requirements.txt +1 -1
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: spaCy Pipeline Visualizer
3
  emoji: πŸ‘€
4
  colorFrom: green
5
  colorTo: gray
 
1
  ---
2
+ title: Gradio Pipeline Visualizer
3
  emoji: πŸ‘€
4
  colorFrom: green
5
  colorTo: gray
app.py CHANGED
@@ -41,42 +41,29 @@ def download_svg(svg):
41
  html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
42
  return html
43
 
44
- # def download_png(svg):
45
- # encode = base64.b64encode(bytes(svg, 'utf-8'))
46
- # svg_uri = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
47
- # output = cairosvg.svg2png(url=svg_uri)
48
- # encoded = base64.b64encode(output)
49
- # img = 'data:image/png;base64,' + str(encoded)[2:-1]
50
- # html = f'<a download="displacy.png" href="{img}" style="{button_css}">Download as PNG</a>'
51
- # return html
52
-
53
- # def download(type, svg):
54
- # if type == 'png':
55
- # return download_png(svg)
56
- # elif type == 'svg':
57
- # return download_svg(svg)
58
-
59
  def dependency(text, col_punct, col_phrase, compact, bg, font, model):
60
- nlp = spacy.load(model + "_sm")
 
61
  doc = nlp(text)
62
  options = {"compact": compact, "collapse_phrases": col_phrase,
63
  "collapse_punct": col_punct, "bg": bg, "color": font}
64
  svg = displacy.render(doc, style="dep", options=options)
65
  download = download_svg(svg)
66
- return svg, download
67
 
68
 
69
  def entity(text, ents, model):
70
- nlp = spacy.load(model + "_sm")
 
71
  doc = nlp(text)
72
  options = {"ents": ents}
73
  svg = displacy.render(doc, style="ent", options=options)
74
- # download = download_svg('<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="en" id="97e9d3ac65344f2bb6e6ce517bd13b1e-0" class="displacy" width="1800" height="399.5" direction="ltr" style="max-width: none; height: 399.5px; color: black; font-family: Arial; direction: ltr">' + svg + "</svg>")
75
- return svg
76
 
77
 
78
  def token(text, attributes, model):
79
- nlp = spacy.load(model + "_sm")
 
80
  data = []
81
  doc = nlp(text)
82
  for tok in doc:
@@ -85,11 +72,12 @@ def token(text, attributes, model):
85
  tok_data.append(getattr(tok, attr))
86
  data.append(tok_data)
87
  data = pd.DataFrame(data, columns=attributes)
88
- return data
89
 
90
 
91
  def default_token(text, attributes, model):
92
- nlp = spacy.load(model + "_sm")
 
93
  data = []
94
  doc = nlp(text)
95
  for tok in doc:
@@ -97,27 +85,30 @@ def default_token(text, attributes, model):
97
  for attr in attributes:
98
  tok_data.append(getattr(tok, attr))
99
  data.append(tok_data)
100
- return data
101
 
102
 
103
  def random_vectors(text, model):
104
- nlp = spacy.load(model + "_md")
 
105
  doc = nlp(text)
106
  n_chunks = [chunk for chunk in doc.noun_chunks if doc.noun_chunks]
107
  words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
108
  'PUNCT', "PROPN"]]
109
  str_list = n_chunks + words
110
  choice = random.choices(str_list, k=2)
111
- return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
112
 
113
 
114
  def vectors(input1, input2, model):
115
- nlp = spacy.load(model + "_md")
116
- return round(nlp(input1).similarity(nlp(input2)), 2)
 
117
 
118
 
119
  def span(text, span1, span2, label1, label2, model):
120
- nlp = spacy.load(model + "_sm")
 
121
  doc = nlp(text)
122
  if span1:
123
  idx1_1 = 0
@@ -155,8 +146,7 @@ def span(text, span1, span2, label1, label2, model):
155
  ]
156
 
157
  svg = displacy.render(doc, style="span")
158
- # download = download_svg(svg)
159
- return svg
160
 
161
 
162
  def get_text(model):
@@ -200,13 +190,21 @@ with demo:
200
  with gr.Column():
201
  gr.Markdown("")
202
 
203
- button = gr.Button("Generate", variant="primary")
204
  with gr.Box():
205
  with gr.Column():
206
- gr.Markdown(
207
- "## [Dependency Parser](https://spacy.io/usage/visualizers#dep)")
208
- gr.Markdown(
209
- "The dependency visualizer shows part-of-speech tags and syntactic dependencies")
 
 
 
 
 
 
 
 
210
  with gr.Row():
211
  with gr.Column():
212
  col_punct = gr.Checkbox(
@@ -221,43 +219,47 @@ with demo:
221
  text = gr.Textbox(
222
  label="Text Color", value="black")
223
 
224
- depen_output = gr.HTML(value=dependency(
225
  DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL)[0])
226
  with gr.Row():
227
  with gr.Column():
228
- dep_button = gr.Button("Generate Dependency Parser", variant="primary")
229
  with gr.Column():
230
- dep_download_button = gr.HTML(value=download_svg(depen_output.value))
231
-
232
-
233
-
234
  gr.Markdown(" ")
235
  with gr.Box():
236
  with gr.Column():
237
- gr.Markdown(
238
- "## [Entity Recognizer](https://spacy.io/usage/visualizers#ent)")
239
- gr.Markdown(
240
- "The entity visualizer highlights named entities and their labels in a text")
 
 
 
 
 
 
 
 
241
  ent_input = gr.CheckboxGroup(
242
  DEFAULT_ENTS, value=DEFAULT_ENTS)
243
  ent_output = gr.HTML(value=entity(
244
- DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL))
245
- ent_button = gr.Button("Generate Entity Recognizer", variant="primary")
246
- # with gr.Row():
247
- # with gr.Column():
248
- # ent_button = gr.Button("Generate Entity Recognizer", variant="primary")
249
- # with gr.Column():
250
- # ent_download_button = gr.HTML(value=download_svg(ent_output.value))
251
- # with gr.Column():
252
- # gr.Markdown(" ")
253
- # with gr.Column():
254
- # gr.Markdown(" ")
255
  with gr.Box():
256
  with gr.Column():
257
- gr.Markdown(
258
- "## [Token Properties](https://spacy.io/usage/linguistic-features)")
259
- gr.Markdown(
260
- "When you put in raw text to spaCy, it returns a Doc object with different linguistic features")
 
 
 
 
 
 
 
 
261
  with gr.Row():
262
  with gr.Column():
263
  tok_input = gr.CheckboxGroup(
@@ -265,14 +267,22 @@ with demo:
265
  with gr.Column():
266
  gr.Markdown("")
267
  tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(
268
- DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL), overflow_row_behaviour="paginate")
269
- tok_button = gr.Button("Generate Token Properties", variant="primary")
270
  with gr.Box():
271
  with gr.Column():
272
- gr.Markdown(
273
- "## [Word and Phrase Similarity](https://spacy.io/usage/linguistic-features#vectors-similarity)")
274
- gr.Markdown(
275
- "Words and spans have similarity ratings based off of their word vectors, or word embeddings")
 
 
 
 
 
 
 
 
276
  with gr.Row():
277
  with gr.Column():
278
  sim_text1 = gr.Textbox(
@@ -285,15 +295,22 @@ with demo:
285
  label="Similarity Score", value="0.12")
286
  with gr.Column():
287
  gr.Markdown("")
288
- sim_random_button = gr.Button("Generate random words")
289
- sim_button = gr.Button("Generate similarity", variant="primary")
290
  with gr.Box():
291
  with gr.Column():
292
- gr.Markdown(
293
- "## [Spans](https://spacy.io/usage/visualizers#span)")
294
- gr.Markdown(
295
- "The span visualizer highlights overlapping spans in a text")
296
-
 
 
 
 
 
 
 
297
  with gr.Row():
298
  with gr.Column():
299
  span1 = gr.Textbox(
@@ -317,39 +334,30 @@ with demo:
317
  with gr.Column():
318
  gr.Markdown("")
319
  span_output = gr.HTML(value=span(
320
- DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL))
321
- span_button = gr.Button("Generate Spans", variant="primary")
322
- # with gr.Row():
323
- # with gr.Column():
324
- # span_button = gr.Button("Generate Spans", variant="primary")
325
- # with gr.Column():
326
- # span_download_button = gr.HTML(value=download_svg(span_output.value))
327
- # with gr.Column():
328
- # gr.Markdown(" ")
329
- # with gr.Column():
330
- # gr.Markdown(" ")
331
 
332
  model_input.change(get_text, inputs=[model_input], outputs=text_input)
333
  button.click(dependency, inputs=[
334
- text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[depen_output, dep_download_button])
335
  button.click(
336
- entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output])
337
  button.click(
338
- token, inputs=[text_input, tok_input, model_input], outputs=tok_output)
339
  button.click(vectors, inputs=[sim_text1,
340
- sim_text2, model_input], outputs=sim_output)
341
  button.click(
342
- span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output])
343
  dep_button.click(dependency, inputs=[
344
- text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[depen_output, dep_download_button])
345
  ent_button.click(
346
- entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output])
347
  tok_button.click(
348
- token, inputs=[text_input, tok_input, model_input], outputs=[tok_output])
349
  sim_button.click(vectors, inputs=[
350
- sim_text1, sim_text2, model_input], outputs=sim_output)
351
  span_button.click(
352
- span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output])
353
  sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
354
- sim_output, sim_text1, sim_text2])
355
  demo.launch()
 
41
  html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
42
  return html
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def dependency(text, col_punct, col_phrase, compact, bg, font, model):
45
+ model_name = model + "_sm"
46
+ nlp = spacy.load(model_name)
47
  doc = nlp(text)
48
  options = {"compact": compact, "collapse_phrases": col_phrase,
49
  "collapse_punct": col_punct, "bg": bg, "color": font}
50
  svg = displacy.render(doc, style="dep", options=options)
51
  download = download_svg(svg)
52
+ return svg, download, model_name
53
 
54
 
55
  def entity(text, ents, model):
56
+ model_name = model + "_sm"
57
+ nlp = spacy.load(model_name)
58
  doc = nlp(text)
59
  options = {"ents": ents}
60
  svg = displacy.render(doc, style="ent", options=options)
61
+ return svg, model_name
 
62
 
63
 
64
  def token(text, attributes, model):
65
+ model_name = model + "_sm"
66
+ nlp = spacy.load(model_name)
67
  data = []
68
  doc = nlp(text)
69
  for tok in doc:
 
72
  tok_data.append(getattr(tok, attr))
73
  data.append(tok_data)
74
  data = pd.DataFrame(data, columns=attributes)
75
+ return data, model_name
76
 
77
 
78
  def default_token(text, attributes, model):
79
+ model_name = model + "_sm"
80
+ nlp = spacy.load(model_name)
81
  data = []
82
  doc = nlp(text)
83
  for tok in doc:
 
85
  for attr in attributes:
86
  tok_data.append(getattr(tok, attr))
87
  data.append(tok_data)
88
+ return data, model_name
89
 
90
 
91
  def random_vectors(text, model):
92
+ model_name = model + "_md"
93
+ nlp = spacy.load(model_name)
94
  doc = nlp(text)
95
  n_chunks = [chunk for chunk in doc.noun_chunks if doc.noun_chunks]
96
  words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
97
  'PUNCT', "PROPN"]]
98
  str_list = n_chunks + words
99
  choice = random.choices(str_list, k=2)
100
+ return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
101
 
102
 
103
  def vectors(input1, input2, model):
104
+ model_name = model + "_md"
105
+ nlp = spacy.load(model_name)
106
+ return round(nlp(input1).similarity(nlp(input2)), 2), model_name
107
 
108
 
109
  def span(text, span1, span2, label1, label2, model):
110
+ model_name = model + "_sm"
111
+ nlp = spacy.load(model_name)
112
  doc = nlp(text)
113
  if span1:
114
  idx1_1 = 0
 
146
  ]
147
 
148
  svg = displacy.render(doc, style="span")
149
+ return svg, model_name
 
150
 
151
 
152
  def get_text(model):
 
190
  with gr.Column():
191
  gr.Markdown("")
192
 
193
+ button = gr.Button("Update", variant="primary")
194
  with gr.Box():
195
  with gr.Column():
196
+ with gr.Row():
197
+ with gr.Column():
198
+ gr.Markdown(
199
+ "## [Dependency Parser](https://spacy.io/usage/visualizers#dep)")
200
+ gr.Markdown(
201
+ "The dependency visualizer shows part-of-speech tags and syntactic dependencies")
202
+ with gr.Column():
203
+ with gr.Row():
204
+ with gr.Column():
205
+ gr.Markdown(" ")
206
+ with gr.Column():
207
+ dep_model = gr.Textbox(label="Model", value="en_core_web_sm")
208
  with gr.Row():
209
  with gr.Column():
210
  col_punct = gr.Checkbox(
 
219
  text = gr.Textbox(
220
  label="Text Color", value="black")
221
 
222
+ dep_output = gr.HTML(value=dependency(
223
  DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL)[0])
224
  with gr.Row():
225
  with gr.Column():
226
+ dep_button = gr.Button("Update Dependency Parser", variant="primary")
227
  with gr.Column():
228
+ dep_download_button = gr.HTML(value=download_svg(dep_output.value))
 
 
 
229
  gr.Markdown(" ")
230
  with gr.Box():
231
  with gr.Column():
232
+ with gr.Row():
233
+ with gr.Column():
234
+ gr.Markdown(
235
+ "## [Entity Recognizer](https://spacy.io/usage/visualizers#ent)")
236
+ gr.Markdown(
237
+ "The entity visualizer highlights named entities and their labels in a text")
238
+ with gr.Column():
239
+ with gr.Row():
240
+ with gr.Column():
241
+ gr.Markdown(" ")
242
+ with gr.Column():
243
+ ent_model = gr.Textbox(label="Model", value="en_core_web_sm")
244
  ent_input = gr.CheckboxGroup(
245
  DEFAULT_ENTS, value=DEFAULT_ENTS)
246
  ent_output = gr.HTML(value=entity(
247
+ DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL)[0])
248
+ ent_button = gr.Button("Update Entity Recognizer", variant="primary")
 
 
 
 
 
 
 
 
 
249
  with gr.Box():
250
  with gr.Column():
251
+ with gr.Row():
252
+ with gr.Column():
253
+ gr.Markdown(
254
+ "## [Token Properties](https://spacy.io/usage/linguistic-features)")
255
+ gr.Markdown(
256
+ "When you put in raw text to spaCy, it returns a Doc object with different linguistic features")
257
+ with gr.Column():
258
+ with gr.Row():
259
+ with gr.Column():
260
+ gr.Markdown(" ")
261
+ with gr.Column():
262
+ tok_model = gr.Textbox(label="Model", value="en_core_web_sm")
263
  with gr.Row():
264
  with gr.Column():
265
  tok_input = gr.CheckboxGroup(
 
267
  with gr.Column():
268
  gr.Markdown("")
269
  tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(
270
+ DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL)[0], overflow_row_behaviour="paginate")
271
+ tok_button = gr.Button("Update Token Properties", variant="primary")
272
  with gr.Box():
273
  with gr.Column():
274
+ with gr.Row():
275
+ with gr.Column():
276
+ gr.Markdown(
277
+ "## [Word and Phrase Similarity](https://spacy.io/usage/linguistic-features#vectors-similarity)")
278
+ gr.Markdown(
279
+ "Words and spans have similarity ratings based off of their word vectors, or word embeddings")
280
+ with gr.Column():
281
+ with gr.Row():
282
+ with gr.Column():
283
+ gr.Markdown(" ")
284
+ with gr.Column():
285
+ sim_model = gr.Textbox(label="Model", value="en_core_web_md")
286
  with gr.Row():
287
  with gr.Column():
288
  sim_text1 = gr.Textbox(
 
295
  label="Similarity Score", value="0.12")
296
  with gr.Column():
297
  gr.Markdown("")
298
+ sim_random_button = gr.Button("Update random words")
299
+ sim_button = gr.Button("Update similarity", variant="primary")
300
  with gr.Box():
301
  with gr.Column():
302
+ with gr.Row():
303
+ with gr.Column():
304
+ gr.Markdown(
305
+ "## [Spans](https://spacy.io/usage/visualizers#span)")
306
+ gr.Markdown(
307
+ "The span visualizer highlights overlapping spans in a text")
308
+ with gr.Column():
309
+ with gr.Row():
310
+ with gr.Column():
311
+ gr.Markdown(" ")
312
+ with gr.Column():
313
+ span_model = gr.Textbox(label="Model", value="en_core_web_sm")
314
  with gr.Row():
315
  with gr.Column():
316
  span1 = gr.Textbox(
 
334
  with gr.Column():
335
  gr.Markdown("")
336
  span_output = gr.HTML(value=span(
337
+ DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
338
+ span_button = gr.Button("Update Spans", variant="primary")
 
 
 
 
 
 
 
 
 
339
 
340
  model_input.change(get_text, inputs=[model_input], outputs=text_input)
341
  button.click(dependency, inputs=[
342
+ text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
343
  button.click(
344
+ entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output, ent_model])
345
  button.click(
346
+ token, inputs=[text_input, tok_input, model_input], outputs=[tok_output, tok_model])
347
  button.click(vectors, inputs=[sim_text1,
348
+ sim_text2, model_input], outputs=[sim_output, sim_model])
349
  button.click(
350
+ span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
351
  dep_button.click(dependency, inputs=[
352
+ text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
353
  ent_button.click(
354
+ entity, inputs=[text_input, ent_input, model_input], outputs=[ent_output, ent_model])
355
  tok_button.click(
356
+ token, inputs=[text_input, tok_input, model_input], outputs=[tok_output, tok_model])
357
  sim_button.click(vectors, inputs=[
358
+ sim_text1, sim_text2, model_input], outputs=[sim_output, sim_model])
359
  span_button.click(
360
+ span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
361
  sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
362
+ sim_output, sim_text1, sim_text2, sim_model])
363
  demo.launch()
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  pandas==1.4.2
2
  gradio==3.0.18
3
- spacy==3.4.0
4
 
5
  https://huggingface.co/spacy/ca_core_news_md/resolve/main/ca_core_news_md-any-py3-none-any.whl
6
  https://huggingface.co/spacy/ca_core_news_sm/resolve/main/ca_core_news_sm-any-py3-none-any.whl
 
1
  pandas==1.4.2
2
  gradio==3.0.18
3
+ spacy==3.3.1
4
 
5
  https://huggingface.co/spacy/ca_core_news_md/resolve/main/ca_core_news_md-any-py3-none-any.whl
6
  https://huggingface.co/spacy/ca_core_news_sm/resolve/main/ca_core_news_sm-any-py3-none-any.whl