ner / app.py
Shivam098's picture
Update app.py
0270794
raw
history blame
1.74 kB
import streamlit as st
import spacy
from spacy import displacy
import json
# Initialize spaCy
nlp = spacy.load("en_core_web_md", disable=["ner"])
# Define sample data
data = {
"fruit": ["apple", "pear", "orange"],
"vegetable": ["broccoli", "spinach", "tomato"],
"meat": ['beef', 'pork', 'turkey', 'duck']
}
nlp.add_pipe(
"concise_concepts",
config={
"data": data,
"ent_score": True, # Entity Scoring section
"verbose": True,
"exclude_pos": ["VERB", "AUX"],
"exclude_dep": ["DOBJ", "PCOMP"],
"include_compound_words": False,
"json_path": "./fruitful_patterns.json",
"topn": (100,500,300)
},
)
# Streamlit app
st.title('Named Entity Recognition with spaCy')
user_input = st.text_area("Enter text:", "")
if st.button("Process"):
if user_input:
# Process the text
doc = nlp(user_input)
# Visualization options
options = {
"colors": {"fruit": "darkorange", "vegetable": "limegreen", "meat": "salmon"},
"ents": ["fruit", "vegetable", "meat"],
}
# JSON serialization with only entity and type
result_dict = {'entities': []}
for ent in doc.ents:
ent_data = {
'entity': ent.text,
'type': ent.label_
}
result_dict['entities'].append(ent_data)
result_json = json.dumps(result_dict, indent=4)
# Display results
st.subheader("Named Entities")
html = displacy.render(doc, style="ent", page=True, minify=True)
st.write(html, unsafe_allow_html=True)
st.subheader("Entities in JSON format")
st.json(result_json)