File size: 2,440 Bytes
b8d16b2
 
65e9efa
b8d16b2
65e9efa
 
b8d16b2
65e9efa
b8d16b2
65e9efa
b8d16b2
65e9efa
b8d16b2
 
65e9efa
 
 
 
b8d16b2
 
169138c
b8d16b2
65e9efa
b8d16b2
 
65e9efa
b8d16b2
65e9efa
 
053f2f3
49c143c
169138c
 
 
 
 
65e9efa
169138c
 
 
 
b8d16b2
169138c
 
b8d16b2
 
169138c
 
 
 
 
b8d16b2
169138c
 
 
 
 
 
 
053f2f3
169138c
 
 
 
 
 
 
 
053f2f3
169138c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import random

import spacy
import srsly
import streamlit as st

nlp = spacy.load("en_core_web_trf")

# Load pre-processed grants from disk.

grants = list(srsly.read_jsonl("data/processed/entities.jsonl"))

colors = {"GPE": "#5cff84", "LOC": "#5cff84"}
options = {"ents": ["GPE", "LOC"], "colors": colors}

HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""


def render_entities(doc, colors: dict, options: dict) -> str:
    """
    Takes a SpaCy doc and renders the entities with the given colors.
    """

    html = spacy.displacy.render(doc, style="ent", options=options)
    html = html.replace("\n", " ")

    return html


st.header("Location Recognition Demo πŸ”ŽπŸŒ†πŸŒ")
st.sidebar.header("Information β„Ή")
st.sidebar.markdown(
    """
This example application accompanies the blog post: [Extracting useful information from documents with Named Entity Recognition]().
It uses a pre-trained Named Entity Recognition (NER) model from the [spaCy](https://spacy.io/) library to extract locations from your own examples, or a sample of grant applications from The Wellcome Trust.
The application will extract the following types of location entity:

* __GPE__: Geopolitical entities (countries, cities, states)
* __LOC__: Locations (mountains, rivers, lakes)
"""
)


def show_example(text):
    html = render_entities(doc, colors, options)
    st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)

    return text


if st.button("Show Wellcome example", key="text"):
    sample = random.choice(grants)
    text = st.text_area(
        "Add your own text or click the button to see a Wellcome example",
        value=sample["text"],
        height=200,
        help="Enter your own text and press CTRL + ENTER to search for entities",
    )
    doc = nlp(text)
    show_example(text)
else:
    text = st.text_area(
        "Add your own text or click the button to see a Wellcome example",
        value="Enter your text here",
        height=200,
        help="Enter your own text and press CTRL + ENTER to search for entities",
    )
    doc = nlp(text)
    show_example(text)

st.markdown(
    "Examples from The Wellcome Trust are taken from data that are publishes openly at [360 Giving](https://data.threesixtygiving.org/). They are published under a [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license."
)