Spaces:
Runtime error
Runtime error
import spacy | |
from spacy import displacy | |
import random | |
from spacy.tokens import Span | |
import gradio as gr | |
DEFAULT_MODEL = "en_core_web_sm" | |
DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles." | |
DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_'] | |
DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART'] | |
nlp = spacy.load("en_core_web_sm") | |
nlp2 = spacy.load("en_core_web_md") | |
def dependency(text, col_punct, col_phrase, compact): | |
doc = nlp(text) | |
options = {"compact": compact, "collapse_phrases": col_phrase, "collapse_punct": col_punct} | |
html = displacy.render(doc, style="dep", options=options) | |
return html | |
def entity(text, ents): | |
doc = nlp(text) | |
options = {"ents": ents} | |
html = displacy.render(doc, style="ent", options=options) | |
return html | |
def text(default): | |
if default: | |
return default | |
def token(text, attributes): | |
data = [] | |
doc = nlp(text) | |
for tok in doc: | |
tok_data = [] | |
for attr in attributes: | |
tok_data.append(getattr(tok, attr)) | |
data.append(tok_data) | |
return data | |
def vectors(text): | |
doc = nlp2(text) | |
n_chunks = [chunk for chunk in doc.noun_chunks] | |
words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in ['PUNCT', "PROPN"]] | |
str_list = n_chunks + words | |
choice = random.choices(str_list, k=2) | |
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text | |
def span(text, span1, span2, label1, label2): | |
doc = nlp(text) | |
idx1_1 = 0 | |
idx1_2 = 0 | |
idx2_1 = 0 | |
idx2_2 = 0 | |
for tok in doc: | |
if span1[0] == tok.text: | |
idx1_1 = tok.idx | |
if span1[-1] == tok.text: | |
idx1_2 = tok.idx | |
if span2[0] == tok.text: | |
idx2_1 = tok.idx | |
if span2[-1] == tok.text: | |
idx2_2 = tok.idx | |
doc.spans["sc"] = [ | |
Span(doc, idx1_1, idx1_2, label1), | |
Span(doc, idx2_1, idx2_2, label2), | |
] | |
html = displacy.render(doc, style="span") | |
return html | |
list_chunks = [chunk.text for chunk in nlp(DEFAULT_TEXT).noun_chunks] | |
demo = gr.Blocks() | |
with demo: | |
# gr.Markdown("Input text here!") | |
text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True) | |
with gr.Tabs(): | |
with gr.TabItem("Dependency"): | |
col_punct = gr.Checkbox(label="Collapse Punctuation", value=True) | |
col_phrase = gr.Checkbox(label="Collapse Phrases", value=True) | |
compact = gr.Checkbox(label="Compact", value=True) | |
depen_output = gr.HTML() | |
depen_button = gr.Button("Generate") | |
with gr.TabItem("Entity"): | |
entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS) | |
entity_output = gr.HTML() | |
entity_button = gr.Button("Generate") | |
with gr.TabItem("Tokens"): | |
tok_input = gr.CheckboxGroup(DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR) | |
tok_output = gr.Dataframe() | |
tok_button = gr.Button("Generate") | |
with gr.TabItem("Similarity"): | |
sim_text1 = gr.Textbox(label="Chosen") | |
sim_text2 = gr.Textbox(label="Chosen") | |
sim_output = gr.Textbox(label="Similarity Score") | |
sim_button = gr.Button("Generate") | |
depen_button.click(dependency, inputs=[text_input, col_punct, col_phrase, compact], outputs=depen_output) | |
entity_button.click(entity, inputs=[text_input, entity_input], outputs=entity_output) | |
tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output) | |
sim_button.click(vectors, inputs=[text_input], outputs=[sim_output, sim_text1, sim_text2]) | |
demo.launch() |