Spaces:
Runtime error
Runtime error
import streamlit as st | |
from flair.data import Sentence | |
from flair.models import SequenceTagger | |
import re | |
import logging | |
# Render Streamlit page | |
st.title("Anonymise your text!") | |
st.markdown( | |
"This mini-app anonymises text using Bert. You can find the code on [GitHub(WIP)](#)" | |
) | |
# Configure logger | |
logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True) | |
def load_tagger(): | |
return SequenceTagger.load("flair/ner-english-large") | |
def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""): | |
"""anonymise text""" | |
if st.session_state.n_requests >= 5: | |
st.session_state.text_error = "Too many requests. Please wait a few seconds before anonymising more text." | |
logging.info(f"Session request limit reached: {st.session_state.n_requests}") | |
st.session_state.n_requests = 1 | |
return | |
st.session_state.text = "" | |
st.session_state.text_error = "" | |
if not text: | |
st.session_state.text_error = "Please enter your text" | |
return | |
with text_spinner_placeholder: | |
with st.spinner("Please wait while your text is being anonymised..."): | |
# flagged = openai.moderate(prompt) | |
# if flagged: | |
# st.session_state.text_error = "Input flagged as inappropriate." | |
# logging.info(f"Topic: {topic}{mood_output}{style_output}\n") | |
# return | |
# else: | |
# load tagger | |
tagger = load_tagger() | |
sentence = Sentence(text) | |
# predict NER tags | |
tagger.predict(sentence) | |
# iterate over entities and redact | |
enitities=[e.text for e in sentence.get_spans('ner')] | |
regex = re.compile('|'.join(map(re.escape, enitities))) | |
text_anon = regex.sub("<PID>", text) | |
st.session_state.text_error = "" | |
st.session_state.n_requests += 1 | |
st.session_state.text_anon = text_anon | |
logging.info( | |
f"text: {text}{metadata}{white_listed_words}\n" | |
f"text anonymised: {st.session_state.text_anon}" | |
) | |
# def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""): | |
# st.session_state.text_anon = "this is anonymised" | |
if "text" not in st.session_state: | |
st.session_state.text = "" | |
if "text_error" not in st.session_state: | |
st.session_state.text_error = "" | |
if "text_anon" not in st.session_state: | |
st.session_state.text_anon = "" | |
if "n_requests" not in st.session_state: | |
st.session_state.n_requests = 0 | |
text = st.text_input(label="Text to be anonymised", placeholder="Write your text here") | |
metadata = st.text_input( | |
label="Data to be redacted (optional)", | |
placeholder="inspirational", | |
) | |
white_listed_words = st.text_input( | |
label="Data to be ignored (optional)", | |
placeholder="inspirational", | |
) | |
# button return true when clicked | |
anonymise_now = st.button( | |
label="Anonymise text", | |
type="primary", | |
on_click=anonymise_text, | |
args=(text, metadata, white_listed_words), | |
) | |
text_spinner_placeholder = st.empty() | |
if st.session_state.text_error: | |
st.error(st.session_state.text_error) | |
if st.session_state.text_anon: | |
st.markdown("""---""") | |
st.text_area(label="Text anonymised", value=st.session_state.text_anon, height=100) |