import random

import spacy
import srsly
import streamlit as st

nlp = spacy.load("en_core_web_trf")

# Load pre-processed grants from disk.

grants = list(srsly.read_jsonl("data/processed/entities.jsonl"))

colors = {"GPE": "#5cff84", "LOC": "#5cff84"}
options = {"ents": ["GPE", "LOC"], "colors": colors}

HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""


def render_entities(doc, colors: dict, options: dict) -> str:
    """
    Takes a SpaCy doc and renders the entities with the given colors.
    """

    html = spacy.displacy.render(doc, style="ent", options=options)
    html = html.replace("\n", " ")

    return html


def show_example(text):
    html = render_entities(doc, colors, options)
    st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)

    return text


st.header("Location Recognition Demo 🔎🌆🌍")
st.sidebar.header("Information ℹ️ ")
st.sidebar.markdown(
    """
This example application accompanies the blog post: [Extracting useful information from documents with Named Entity Recognition](https://medium.com/@reproducible/extracting-useful-information-from-documents-with-named-entity-recognition-4e009b60a8c).
It uses a pre-trained Named Entity Recognition (NER) model from the [spaCy](https://spacy.io/) library to extract locations from your own examples, or a sample of grant applications from The Wellcome Trust.
The application will extract the following types of location entity:

* __GPE__: Geopolitical entities (countries, cities, states)
* __LOC__: Locations (mountains, rivers, lakes)

This model will innevitably make some mistakes; it was trained on a large generic corpus of text, and the Wellcome Trust grant applications come from a very specific domain. We could improve this model by fine-tuning it on data from this domain.
"""
)

if st.button("Show Wellcome example", key="text"):
    sample = random.choice(grants)
    text = st.text_area(
        "Add your own text or click the button to see a Wellcome example",
        value=sample["text"],
        height=200,
        help="Enter your own text and press CTRL + ENTER to search for entities",
    )
    doc = nlp(text)
    show_example(text)
else:
    text = st.text_area(
        "Add your own text or click the button to see a Wellcome example",
        value="Enter your text here",
        height=200,
        help="Enter your own text and press CTRL + ENTER to search for entities",
    )
    doc = nlp(text)
    show_example(text)

st.markdown(
    "Examples from The Wellcome Trust are taken from data that are publishes openly at [360 Giving](https://data.threesixtygiving.org/). They are published under a [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license."
)