|
import streamlit as st |
|
import spacy |
|
from spacy import displacy |
|
import json |
|
|
|
|
|
|
|
nlp = spacy.load("en_core_web_md", disable=["ner"]) |
|
|
|
|
|
data = { |
|
"fruit": ["apple", "pear", "orange"], |
|
"vegetable": ["broccoli", "spinach", "tomato"], |
|
"meat": ['beef', 'pork', 'turkey', 'duck'] |
|
} |
|
nlp.add_pipe( |
|
"concise_concepts", |
|
config={ |
|
"data": data, |
|
"ent_score": True, |
|
"verbose": True, |
|
"exclude_pos": ["VERB", "AUX"], |
|
"exclude_dep": ["DOBJ", "PCOMP"], |
|
"include_compound_words": False, |
|
"json_path": "./fruitful_patterns.json", |
|
"topn": (100,500,300) |
|
}, |
|
) |
|
|
|
st.title('Named Entity Recognition with spaCy') |
|
|
|
user_input = st.text_area("Enter text:", "") |
|
|
|
if st.button("Process"): |
|
if user_input: |
|
|
|
doc = nlp(user_input) |
|
|
|
|
|
options = { |
|
"colors": {"fruit": "darkorange", "vegetable": "limegreen", "meat": "salmon"}, |
|
"ents": ["fruit", "vegetable", "meat"], |
|
} |
|
|
|
|
|
result_dict = {'entities': []} |
|
|
|
for ent in doc.ents: |
|
ent_data = { |
|
'entity': ent.text, |
|
'type': ent.label_ |
|
} |
|
result_dict['entities'].append(ent_data) |
|
|
|
result_json = json.dumps(result_dict, indent=4) |
|
|
|
|
|
st.subheader("Named Entities") |
|
html = displacy.render(doc, style="ent", page=True, minify=True) |
|
st.write(html, unsafe_allow_html=True) |
|
st.subheader("Entities in JSON format") |
|
st.json(result_json) |
|
|