anonymise_this / app.py
EC2 Default User
debug
a6314b2
raw
history blame
3.41 kB
import streamlit as st
from flair.data import Sentence
from flair.models import SequenceTagger
import re
import logging
# Render Streamlit page
st.title("Anonymise your text!")
st.markdown(
"This mini-app anonymises text using Bert. You can find the code on [GitHub(WIP)](#)"
)
# Configure logger
logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True)
@st.cache(suppress_st_warning=True)
def load_tagger():
return SequenceTagger.load("flair/ner-english-large")
def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
"""anonymise text"""
if st.session_state.n_requests >= 5:
st.session_state.text_error = "Too many requests. Please wait a few seconds before anonymising more text."
logging.info(f"Session request limit reached: {st.session_state.n_requests}")
st.session_state.n_requests = 1
return
st.session_state.text = ""
st.session_state.text_error = ""
if not text:
st.session_state.text_error = "Please enter your text"
return
with text_spinner_placeholder:
with st.spinner("Please wait while your text is being anonymised..."):
# flagged = openai.moderate(prompt)
# if flagged:
# st.session_state.text_error = "Input flagged as inappropriate."
# logging.info(f"Topic: {topic}{mood_output}{style_output}\n")
# return
# else:
# load tagger
tagger = load_tagger()
sentence = Sentence(text)
# predict NER tags
tagger.predict(sentence)
# iterate over entities and redact
enitities=[e.text for e in sentence.get_spans('ner')]
regex = re.compile('|'.join(map(re.escape, enitities)))
text_anon = regex.sub("<PID>", text)
st.session_state.text_error = ""
st.session_state.n_requests += 1
st.session_state.text_anon = text_anon
logging.info(
f"text: {text}{metadata}{white_listed_words}\n"
f"text anonymised: {st.session_state.text_anon}"
)
# def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
# st.session_state.text_anon = "this is anonymised"
if "text" not in st.session_state:
st.session_state.text = ""
if "text_error" not in st.session_state:
st.session_state.text_error = ""
if "text_anon" not in st.session_state:
st.session_state.text_anon = ""
if "n_requests" not in st.session_state:
st.session_state.n_requests = 0
text = st.text_input(label="Text to be anonymised", placeholder="Write your text here")
metadata = st.text_input(
label="Data to be redacted (optional)",
placeholder="inspirational",
)
white_listed_words = st.text_input(
label="Data to be ignored (optional)",
placeholder="inspirational",
)
# button return true when clicked
anonymise_now = st.button(
label="Anonymise text",
type="primary",
on_click=anonymise_text,
args=(text, metadata, white_listed_words),
)
text_spinner_placeholder = st.empty()
if st.session_state.text_error:
st.error(st.session_state.text_error)
if st.session_state.text_anon:
st.markdown("""---""")
st.text_area(label="Text anonymised", value=st.session_state.text_anon, height=100)