File size: 3,407 Bytes
87d5615
f7ca36c
 
 
2414dea
87d5615
2414dea
 
 
 
 
 
 
 
9e72f39
 
 
 
2414dea
f7ca36c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e72f39
f7ca36c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e387f6f
 
 
 
 
 
 
 
34cc4af
 
42cb2f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b06718d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
from flair.data import Sentence
from flair.models import SequenceTagger
import re 
import logging

# Render Streamlit page
st.title("Anonymise your text!")
st.markdown(
    "This mini-app anonymises text using Bert. You can find the code on [GitHub(WIP)](#)"
)
# Configure logger
logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, force=True)

@st.cache(suppress_st_warning=True)
def load_tagger():
    return SequenceTagger.load("flair/ner-english-large")

def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
    """anonymise text"""
    if st.session_state.n_requests >= 5:
        st.session_state.text_error = "Too many requests. Please wait a few seconds before anonymising more text."
        logging.info(f"Session request limit reached: {st.session_state.n_requests}")
        st.session_state.n_requests = 1
        return

    st.session_state.text = ""
    st.session_state.text_error = ""

    if not text:
        st.session_state.text_error = "Please enter your text"
        return

    with text_spinner_placeholder:
        with st.spinner("Please wait while your text is being anonymised..."):
            
            # flagged = openai.moderate(prompt)
            # if flagged:
            #     st.session_state.text_error = "Input flagged as inappropriate."
            #     logging.info(f"Topic: {topic}{mood_output}{style_output}\n")
            #     return
            
            # else:
            # load tagger
            tagger = load_tagger() 
            sentence = Sentence(text)
            # predict NER tags
            tagger.predict(sentence)
            # iterate over entities and redact
            enitities=[e.text for e in sentence.get_spans('ner')]
            regex = re.compile('|'.join(map(re.escape, enitities)))
            text_anon = regex.sub("<PID>", text)
            
            st.session_state.text_error = ""
            st.session_state.n_requests += 1
            st.session_state.text_anon = text_anon
            logging.info(
                f"text: {text}{metadata}{white_listed_words}\n"
                f"text anonymised: {st.session_state.text_anon}"
            )
# def anonymise_text(text: str, metadata: str = "", white_listed_words: str = ""):
#     st.session_state.text_anon = "this is anonymised"

if "text" not in st.session_state:
    st.session_state.text = ""
if "text_error" not in st.session_state:
    st.session_state.text_error = ""
if "text_anon" not in st.session_state:
    st.session_state.text_anon = ""
if "n_requests" not in st.session_state:
    st.session_state.n_requests = 0

text = st.text_input(label="Text to be anonymised", placeholder="Write your text here")
metadata = st.text_input(
    label="Data to be redacted (optional)",
    placeholder="inspirational",
)
white_listed_words = st.text_input(
    label="Data to be ignored (optional)",
    placeholder="inspirational",
)
# button return true when clicked
anonymise_now = st.button(
    label="Anonymise text",
    type="primary",
    on_click=anonymise_text,
    args=(text, metadata, white_listed_words),
)
text_spinner_placeholder = st.empty()
if st.session_state.text_error:
    st.error(st.session_state.text_error)

if st.session_state.text_anon:
    st.markdown("""---""")
    st.text_area(label="Text anonymised", value=st.session_state.text_anon, height=100)