import spacy
from spacy import displacy
import random
from spacy.tokens import Span
import gradio as gr
DEFAULT_MODEL = "en_core_web"
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
def get_all_models():
with open("requirements.txt") as f:
content = f.readlines()
models = []
for line in content:
if "" in line:
model = "_".join(line.split("/")[4].split("_")[:3])
if model not in models:
return models
models = get_all_models()
def dependency(text, col_punct, col_phrase, compact, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
options = {"compact": compact, "collapse_phrases": col_phrase,
"collapse_punct": col_punct}
html = displacy.render(doc, style="dep", options=options)
return html
def entity(text, ents, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
options = {"ents": ents}
html = displacy.render(doc, style="ent", options=options)
return html
def token(text, attributes, model):
nlp = spacy.load(model + "_sm")
data = []
doc = nlp(text)
for tok in doc:
tok_data = []
for attr in attributes:
tok_data.append(getattr(tok, attr))
return data
def vectors(text, model):
nlp = spacy.load(model + "_md")
doc = nlp(text)
n_chunks = [chunk for chunk in doc.noun_chunks]
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
str_list = n_chunks + words
choice = random.choices(str_list, k=2)
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
def span(text, span1, span2, label1, label2, model):
nlp = spacy.load(model + "_sm")
doc = nlp(text)
idx1_1 = 0
idx1_2 = 0
idx2_1 = 0
idx2_2 = 0
span1 = span1.split(" ")
span2 = span2.split(" ")
for i in range(len(list(doc))):
tok = list(doc)[i]
if span1[0] == tok.text:
idx1_1 = i
if span1[-1] == tok.text:
idx1_2 = i + 1
if span2[0] == tok.text:
idx2_1 = i
if span2[-1] == tok.text:
idx2_2 = i + 1
doc.spans["sc"] = [
Span(doc, idx1_1, idx1_2, label1),
Span(doc, idx2_1, idx2_2, label2),
html = displacy.render(doc, style="span")
return html
demo = gr.Blocks()
with demo:
text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
model_input = gr.Dropdown(
choices=models, value=DEFAULT_MODEL, interactive=True)
with gr.Tabs():
with gr.TabItem("Dependency"):
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
compact = gr.Checkbox(label="Compact", value=True)
depen_output = gr.HTML()
depen_button = gr.Button("Generate")
with gr.TabItem("Entity"):
entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
entity_output = gr.HTML()
entity_button = gr.Button("Generate")
with gr.TabItem("Tokens"):
with gr.Column():
tok_input = gr.CheckboxGroup(
tok_output = gr.Dataframe(
headers=DEFAULT_TOK_ATTR, overflow_row_behaviour="paginate")
tok_button = gr.Button("Generate")
with gr.TabItem("Similarity"):
sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
sim_text2 = gr.Textbox(value="the US", label="Chosen")
sim_output = gr.Textbox(value="0.09", label="Similarity Score")
sim_button = gr.Button("Generate")
with gr.TabItem("Spans"):
with gr.Row():
span1 = gr.Textbox(value="David Bowie", label="Span 1")
label1 = gr.Textbox(value="Name",
label="Label for Span 1")
with gr.Row():
span2 = gr.Textbox(value="David", label="Span 2")
label2 = gr.Textbox(value="First",
label="Label for Span 2")
span_output = gr.HTML()
span_button = gr.Button("Generate"), inputs=[
text_input, col_punct, col_phrase, compact, model_input], outputs=depen_output)
entity, inputs=[text_input, entity_input, model_input], outputs=entity_output)
token, inputs=[text_input, tok_input, model_input], outputs=tok_output), inputs=[text_input, model_input], outputs=[
sim_output, sim_text1, sim_text2])
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=span_output)