Spaces:
Runtime error
Runtime error
File size: 3,407 Bytes
87d5615 f7ca36c 2414dea 87d5615 2414dea 9e72f39 2414dea f7ca36c 9e72f39 f7ca36c e387f6f 34cc4af 42cb2f7 b06718d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
from flair.data import Sentence
from flair.models import SequenceTagger
import re
import logging
# Render Streamlit page
st.title("Anonymise your text!")
st.markdown(
"This mini-app anonymises text using Bert. You can find the code on [GitHub(WIP)](#)"
)
# Configure logger
logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True)
@st.cache(suppress_st_warning=True)
def load_tagger():
return SequenceTagger.load("flair/ner-english-large")
def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
"""anonymise text"""
if st.session_state.n_requests >= 5:
st.session_state.text_error = "Too many requests. Please wait a few seconds before anonymising more text."
logging.info(f"Session request limit reached: {st.session_state.n_requests}")
st.session_state.n_requests = 1
return
st.session_state.text = ""
st.session_state.text_error = ""
if not text:
st.session_state.text_error = "Please enter your text"
return
with text_spinner_placeholder:
with st.spinner("Please wait while your text is being anonymised..."):
# flagged = openai.moderate(prompt)
# if flagged:
# st.session_state.text_error = "Input flagged as inappropriate."
# logging.info(f"Topic: {topic}{mood_output}{style_output}\n")
# return
# else:
# load tagger
tagger = load_tagger()
sentence = Sentence(text)
# predict NER tags
tagger.predict(sentence)
# iterate over entities and redact
enitities=[e.text for e in sentence.get_spans('ner')]
regex = re.compile('|'.join(map(re.escape, enitities)))
text_anon = regex.sub("<PID>", text)
st.session_state.text_error = ""
st.session_state.n_requests += 1
st.session_state.text_anon = text_anon
logging.info(
f"text: {text}{metadata}{white_listed_words}\n"
f"text anonymised: {st.session_state.text_anon}"
)
# def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
# st.session_state.text_anon = "this is anonymised"
if "text" not in st.session_state:
st.session_state.text = ""
if "text_error" not in st.session_state:
st.session_state.text_error = ""
if "text_anon" not in st.session_state:
st.session_state.text_anon = ""
if "n_requests" not in st.session_state:
st.session_state.n_requests = 0
text = st.text_input(label="Text to be anonymised", placeholder="Write your text here")
metadata = st.text_input(
label="Data to be redacted (optional)",
placeholder="inspirational",
)
white_listed_words = st.text_input(
label="Data to be ignored (optional)",
placeholder="inspirational",
)
# button return true when clicked
anonymise_now = st.button(
label="Anonymise text",
type="primary",
on_click=anonymise_text,
args=(text, metadata, white_listed_words),
)
text_spinner_placeholder = st.empty()
if st.session_state.text_error:
st.error(st.session_state.text_error)
if st.session_state.text_anon:
st.markdown("""---""")
st.text_area(label="Text anonymised", value=st.session_state.text_anon, height=100) |