Victoria Slocum commited on
Commit
c0dee52
1 Parent(s): 09414a9

Feat:Spans

Browse files
Files changed (1) hide show
  1. app.py +84 -60
app.py CHANGED
@@ -7,72 +7,83 @@ import gradio as gr
7
  DEFAULT_MODEL = "en_core_web_sm"
8
  DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
9
  DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
10
- DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
 
11
 
12
  nlp = spacy.load("en_core_web_sm")
13
  nlp2 = spacy.load("en_core_web_md")
14
 
 
15
  def dependency(text, col_punct, col_phrase, compact):
16
- doc = nlp(text)
17
- options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct}
18
- html = displacy.render(doc, style="dep", options=options)
19
- return html
 
 
20
 
21
  def entity(text, ents):
22
- doc = nlp(text)
23
- options = {"ents": ents}
24
- html = displacy.render(doc, style="ent", options=options)
25
- return html
26
 
27
- def text(default):
28
- if default:
29
- return default
30
 
31
- def token(text, attributes):
32
- data = []
33
- doc = nlp(text)
34
- for tok in doc:
35
- tok_data = []
36
- for attr in attributes:
37
- tok_data.append(getattr(tok, attr))
38
- data.append(tok_data)
39
- return data
40
 
41
- def vectors(text):
42
- doc = nlp2(text)
43
- n_chunks = [chunk for chunk in doc.noun_chunks]
44
- words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]]
45
- str_list = n_chunks + words
46
- choice = random.choices(str_list, k=2)
47
- return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
48
 
49
- def span(text, span1, span2, label1, label2):
50
- doc = nlp(text)
51
- idx1_1 = 0
52
- idx1_2 = 0
53
- idx2_1 = 0
54
- idx2_2 = 0
 
 
 
55
 
56
- for tok in doc:
57
- if span1[0] == tok.text:
58
- idx1_1 = tok.idx
59
- if span1[-1] == tok.text:
60
- idx1_2 = tok.idx
61
- if span2[0] == tok.text:
62
- idx2_1 = tok.idx
63
- if span2[-1] == tok.text:
64
- idx2_2 = tok.idx
65
 
 
 
 
 
 
 
 
 
66
 
67
- doc.spans["sc"] = [
68
- Span(doc, idx1_1, idx1_2, label1),
69
- Span(doc, idx2_1, idx2_2, label2),
70
- ]
71
 
72
- html = displacy.render(doc, style="span")
73
- return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks]
76
 
77
  demo = gr.Blocks()
78
 
@@ -82,7 +93,7 @@ with demo:
82
  with gr.Tabs():
83
  with gr.TabItem("Dependency"):
84
  col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
85
- col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
86
  compact = gr.Checkbox(label="Compact", value=True)
87
  depen_output = gr.HTML()
88
  depen_button = gr.Button("Generate")
@@ -91,18 +102,31 @@ with demo:
91
  entity_output = gr.HTML()
92
  entity_button = gr.Button("Generate")
93
  with gr.TabItem("Tokens"):
94
- tok_input = gr.CheckboxGroup(DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
 
95
  tok_output = gr.Dataframe()
96
  tok_button = gr.Button("Generate")
97
  with gr.TabItem("Similarity"):
98
- sim_text1 = gr.Textbox(label="Chosen")
99
- sim_text2 = gr.Textbox(label="Chosen")
100
- sim_output = gr.Textbox(label="Similarity Score")
101
  sim_button = gr.Button("Generate")
102
-
103
- depen_button.click(dependency, inputs=[text_input, col_punct, col_phrase, compact], outputs=depen_output)
104
- entity_button.click(entity, inputs=[text_input, entity_input], outputs=entity_output)
 
 
 
 
 
 
 
 
 
105
  tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
106
- sim_button.click(vectors, inputs=[text_input], outputs=[sim_output, sim_text1, sim_text2])
 
 
 
107
 
108
- demo.launch()
 
7
  DEFAULT_MODEL = "en_core_web_sm"
8
  DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
9
  DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
10
+ DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
11
+ 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
12
 
13
  nlp = spacy.load("en_core_web_sm")
14
  nlp2 = spacy.load("en_core_web_md")
15
 
16
+
17
  def dependency(text, col_punct, col_phrase, compact):
18
+ doc = nlp(text)
19
+ options = {"compact": compact, "collapse_phrases": col_phrase,
20
+ "collapse_punct": col_punct}
21
+ html = displacy.render(doc, style="dep", options=options)
22
+ return html
23
+
24
 
25
  def entity(text, ents):
26
+ doc = nlp(text)
27
+ options = {"ents": ents}
28
+ html = displacy.render(doc, style="ent", options=options)
29
+ return html
30
 
 
 
 
31
 
32
+ def text(default):
33
+ if default:
34
+ return default
 
 
 
 
 
 
35
 
 
 
 
 
 
 
 
36
 
37
+ def token(text, attributes):
38
+ data = []
39
+ doc = nlp(text)
40
+ for tok in doc:
41
+ tok_data = []
42
+ for attr in attributes:
43
+ tok_data.append(getattr(tok, attr))
44
+ data.append(tok_data)
45
+ return data
46
 
 
 
 
 
 
 
 
 
 
47
 
48
+ def vectors(text):
49
+ doc = nlp2(text)
50
+ n_chunks = [chunk for chunk in doc.noun_chunks]
51
+ words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
52
+ 'PUNCT', "PROPN"]]
53
+ str_list = n_chunks + words
54
+ choice = random.choices(str_list, k=2)
55
+ return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
56
 
 
 
 
 
57
 
58
+ def span(text, span1, span2, label1, label2):
59
+ doc = nlp(text)
60
+ idx1_1 = 0
61
+ idx1_2 = 0
62
+ idx2_1 = 0
63
+ idx2_2 = 0
64
+
65
+ span1 = span1.split(" ")
66
+ span2 = span2.split(" ")
67
+
68
+ for i in range(len(list(doc))):
69
+ tok = list(doc)[i]
70
+ if span1[0] == tok.text:
71
+ idx1_1 = i
72
+ if span1[-1] == tok.text:
73
+ idx1_2 = i + 1
74
+ if span2[0] == tok.text:
75
+ idx2_1 = i
76
+ if span2[-1] == tok.text:
77
+ idx2_2 = i + 1
78
+
79
+ doc.spans["sc"] = [
80
+ Span(doc, idx1_1, idx1_2, label1),
81
+ Span(doc, idx2_1, idx2_2, label2),
82
+ ]
83
+
84
+ html = displacy.render(doc, style="span")
85
+ return html
86
 
 
87
 
88
  demo = gr.Blocks()
89
 
 
93
  with gr.Tabs():
94
  with gr.TabItem("Dependency"):
95
  col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
96
+ col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
97
  compact = gr.Checkbox(label="Compact", value=True)
98
  depen_output = gr.HTML()
99
  depen_button = gr.Button("Generate")
 
102
  entity_output = gr.HTML()
103
  entity_button = gr.Button("Generate")
104
  with gr.TabItem("Tokens"):
105
+ tok_input = gr.CheckboxGroup(
106
+ DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
107
  tok_output = gr.Dataframe()
108
  tok_button = gr.Button("Generate")
109
  with gr.TabItem("Similarity"):
110
+ sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
111
+ sim_text2 = gr.Textbox(value="the US", label="Chosen")
112
+ sim_output = gr.Textbox(value="0.09", label="Similarity Score")
113
  sim_button = gr.Button("Generate")
114
+ with gr.TabItem("Spans"):
115
+ span1 = gr.Textbox(value="David Bowie", label="Span 1")
116
+ label1 = gr.Textbox(value="Full Name", label="Label for Span 1")
117
+ span2 = gr.Textbox(value="David", label="Span 2")
118
+ label2 = gr.Textbox(value="First Name", label="Label for Span 2")
119
+ span_output = gr.HTML()
120
+ span_button = gr.Button("Generate")
121
+
122
+ depen_button.click(dependency, inputs=[
123
+ text_input, col_punct, col_phrase, compact], outputs=depen_output)
124
+ entity_button.click(
125
+ entity, inputs=[text_input, entity_input], outputs=entity_output)
126
  tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
127
+ sim_button.click(vectors, inputs=[text_input], outputs=[
128
+ sim_output, sim_text1, sim_text2])
129
+ span_button.click(
130
+ span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output)
131
 
132
+ demo.launch()