|
|
|
|
|
|
|
|
import requests |
|
|
import spacy_udpipe |
|
|
import streamlit as st |
|
|
from spacy import displacy |
|
|
|
|
|
|
|
|
spacy_udpipe.download("he") |
|
|
nlp = spacy_udpipe.load("he") |
|
|
nlp.add_pipe("span_marker", |
|
|
config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"}) |
|
|
|
|
|
|
|
|
def get_html(html: str): |
|
|
"""Convert HTML so it can be rendered.""" |
|
|
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>""" |
|
|
|
|
|
html = html.replace("\n", " ") |
|
|
style = "<style>mark.entity { display: inline-block }</style>" |
|
|
html = WRAPPER.format(html) |
|
|
return f"{style}{html}" |
|
|
|
|
|
|
|
|
def page_init(): |
|
|
st.header("Named Entity Recognition Demo") |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def get_html_from_server(text): |
|
|
base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}" |
|
|
|
|
|
def get_entities(text): |
|
|
text = text.strip() |
|
|
if text == "": |
|
|
return [] |
|
|
response = requests.get(base_url.format(text)) |
|
|
answer = response.json() |
|
|
ents = [] |
|
|
for ent in answer["ents"]: |
|
|
if ent["entity_group"] == "O": |
|
|
continue |
|
|
ents.append({ |
|
|
"start": ent["start"], |
|
|
"end": ent["end"], |
|
|
"label": ent["entity_group"] |
|
|
}) |
|
|
answer["ents"] = ents |
|
|
return answer |
|
|
|
|
|
def render_entities(text): |
|
|
entities = get_entities(text) |
|
|
html = displacy.render(entities, |
|
|
style="ent", |
|
|
options={"direction": "rtl"}, |
|
|
manual=True) |
|
|
return html.replace("ltr", "rtl") |
|
|
|
|
|
return get_html(render_entities(text)) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
page_init() |
|
|
|
|
|
sample_text = "讬讜\"专 讜注讚转 讛谞讜注专 谞转谉 住诇讜讘讟讬拽 讗诪专 砖讛砖讞拽谞讬诐 砖诇 讗谞讞谞讜 诇讗 诪砖转诇讘讬诐 讘讗讬专讜驻讛." |
|
|
|
|
|
text = st.text_area("Text", sample_text, height=200, max_chars=1000) |
|
|
btn = st.button("Annotate") |
|
|
style = """ |
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=David+Libre"> |
|
|
<style> |
|
|
.stTextArea textarea { |
|
|
font-size: 20px; |
|
|
font-color: black; |
|
|
font-family: 'David+Libre'; |
|
|
direction: rtl; |
|
|
} |
|
|
.entities { |
|
|
font-size: 16px; |
|
|
font-family: 'David+Libre'; |
|
|
direction: rtl; |
|
|
} |
|
|
#MainMenu {visibility: hidden;} |
|
|
footer {visibility: hidden;} |
|
|
</style> |
|
|
""" |
|
|
st.write(style, unsafe_allow_html=True) |
|
|
|
|
|
if text and btn: |
|
|
doc = nlp(text) |
|
|
html = displacy.render( |
|
|
doc, |
|
|
style="ent", |
|
|
options={"direction": "rtl"}, |
|
|
manual=False, |
|
|
) |
|
|
|
|
|
nemo_html = get_html(html) |
|
|
iahlt_html = get_html_from_server(text) |
|
|
|
|
|
html = f""" |
|
|
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> |
|
|
<div> |
|
|
<h3>Nemo model results</h3> |
|
|
{nemo_html} |
|
|
</div> |
|
|
</div> |
|
|
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl"> |
|
|
<div> |
|
|
<h3>IAHLT results</h3> |
|
|
{iahlt_html} |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
st.write(html, unsafe_allow_html=True) |
|
|
|
|
|
else: |
|
|
st.write("") |
|
|
|