LocationFinder / app.py
mattupson's picture
Update app.py
3f1f888
raw
history blame contribute delete
No virus
2.81 kB
import random
import spacy
import srsly
import streamlit as st
nlp = spacy.load("en_core_web_trf")
# Load pre-processed grants from disk.
grants = list(srsly.read_jsonl("data/processed/entities.jsonl"))
colors = {"GPE": "#5cff84", "LOC": "#5cff84"}
options = {"ents": ["GPE", "LOC"], "colors": colors}
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
def render_entities(doc, colors: dict, options: dict) -> str:
"""
Takes a SpaCy doc and renders the entities with the given colors.
"""
html = spacy.displacy.render(doc, style="ent", options=options)
html = html.replace("\n", " ")
return html
def show_example(text):
html = render_entities(doc, colors, options)
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
return text
st.header("Location Recognition Demo πŸ”ŽπŸŒ†πŸŒ")
st.sidebar.header("Information ℹ️ ")
st.sidebar.markdown(
"""
This example application accompanies the blog post: [Extracting useful information from documents with Named Entity Recognition](https://medium.com/@reproducible/extracting-useful-information-from-documents-with-named-entity-recognition-4e009b60a8c).
It uses a pre-trained Named Entity Recognition (NER) model from the [spaCy](https://spacy.io/) library to extract locations from your own examples, or a sample of grant applications from The Wellcome Trust.
The application will extract the following types of location entity:
* __GPE__: Geopolitical entities (countries, cities, states)
* __LOC__: Locations (mountains, rivers, lakes)
This model will innevitably make some mistakes; it was trained on a large generic corpus of text, and the Wellcome Trust grant applications come from a very specific domain. We could improve this model by fine-tuning it on data from this domain.
"""
)
if st.button("Show Wellcome example", key="text"):
sample = random.choice(grants)
text = st.text_area(
"Add your own text or click the button to see a Wellcome example",
value=sample["text"],
height=200,
help="Enter your own text and press CTRL + ENTER to search for entities",
)
doc = nlp(text)
show_example(text)
else:
text = st.text_area(
"Add your own text or click the button to see a Wellcome example",
value="Enter your text here",
height=200,
help="Enter your own text and press CTRL + ENTER to search for entities",
)
doc = nlp(text)
show_example(text)
st.markdown(
"Examples from The Wellcome Trust are taken from data that are publishes openly at [360 Giving](https://data.threesixtygiving.org/). They are published under a [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license."
)