arogeriogel commited on
Commit
2e33ce2
1 Parent(s): 981d935
Files changed (2) hide show
  1. app.py +16 -24
  2. requirements.txt +5 -5
app.py CHANGED
@@ -15,7 +15,7 @@ from detoxify import Detoxify
15
 
16
  st.title("Anonymise your text!")
17
  st.markdown(
18
- "This mini-app anonymises text using Flair and Presidio. You can find the code in the Files and versions tab above. The development of this app was inspired by previous work, namely this [pii-anonimyzer](https://huggingface.co/spaces/beki/pii-anonymizer)"
19
  )
20
 
21
  # Configure logger
@@ -25,7 +25,7 @@ logging.basicConfig(format="\n%(asctime)s\n%(message)s", level=logging.INFO, for
25
  ###### Define functions ######
26
  ##############################
27
 
28
- @st.cache(allow_output_mutation=True,show_spinner=False)
29
  def analyzer_engine():
30
  """Return AnalyzerEngine."""
31
  analyzer = AnalyzerEngine()
@@ -40,19 +40,6 @@ def analyze(**kwargs):
40
  if "entities" not in kwargs or "All" in kwargs["entities"]:
41
  kwargs["entities"] = None
42
 
43
- # if st.session_state.excluded_words:
44
-
45
- # deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
46
-
47
- # logging.info(
48
- # f"words excluded : {deny_list}\n"
49
- # )
50
-
51
- # excluded_words_recognizer = PatternRecognizer(supported_entity="MANUAL ADD",
52
- # name="Excluded words recognizer",
53
- # deny_list=deny_list)
54
- # analyzer_engine().registry.add_recognizer(excluded_words_recognizer)
55
-
56
  results = analyzer_engine().analyze(**kwargs)
57
  st.session_state.analyze_results = results
58
 
@@ -119,17 +106,17 @@ def analyze_text():
119
  )
120
 
121
  if st.session_state.excluded_words:
122
- include_manual_input()
123
 
124
  if st.session_state.allowed_words:
125
- exclude_manual_input()
126
 
127
  logging.info(
128
  f"analyse results: {st.session_state.analyze_results}\n"
129
  )
130
 
131
 
132
- def include_manual_input():
133
  deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
134
  def _deny_list_to_regex(deny_list):
135
  """
@@ -161,7 +148,13 @@ def include_manual_input():
161
  score=1.0,
162
  )
163
 
164
- results.append(pattern_result)
 
 
 
 
 
 
165
 
166
  results = EntityRecognizer.remove_duplicates(results)
167
 
@@ -171,7 +164,7 @@ def include_manual_input():
171
  f"analyse results after adding excluded words: {st.session_state.analyze_results}\n"
172
  )
173
 
174
- def exclude_manual_input():
175
  analyze_results_fltered=[]
176
 
177
  for token in st.session_state.analyze_results:
@@ -182,7 +175,7 @@ def exclude_manual_input():
182
  )
183
  st.session_state.analyze_results = analyze_results_fltered
184
 
185
- @st.cache(allow_output_mutation=True)
186
  def anonymizer_engine():
187
  """Return AnonymizerEngine."""
188
  return AnonymizerEngine()
@@ -215,7 +208,6 @@ def anonymise_text():
215
  def clear_results():
216
  st.session_state.anon_results=""
217
  st.session_state.analyze_results=""
218
- # analyzer_engine().registry.remove_recognizer("Excluded words recognizer")
219
 
220
  #######################################
221
  #### Initialize "global" variables ####
@@ -305,10 +297,10 @@ with col1:
305
  annotated_text(*annotated_tokens)
306
  st.write(st.session_state.analyze_results)
307
  if not st.session_state.analyze_results and analyze_now and not st.session_state.text_error:
308
- st.write("No PII was found.")
309
 
310
  with col2:
311
  if st.session_state.anon_results:
312
  st.write(st.session_state.anon_results.text)
313
  if not st.session_state.analyze_results and anonymise_now and not st.session_state.text_error:
314
- st.write("No PII was found.")
 
15
 
16
  st.title("Anonymise your text!")
17
  st.markdown(
18
+ "This mini-app anonymises text using Flair and Presidio. You can find the code in the Files and Versions tabs in the [HuggingFace page](https://huggingface.co/spaces/arogeriogel/anonymise_this)"
19
  )
20
 
21
  # Configure logger
 
25
  ###### Define functions ######
26
  ##############################
27
 
28
+ @st.cache_resource(show_spinner="Fetching model from cache...")
29
  def analyzer_engine():
30
  """Return AnalyzerEngine."""
31
  analyzer = AnalyzerEngine()
 
40
  if "entities" not in kwargs or "All" in kwargs["entities"]:
41
  kwargs["entities"] = None
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  results = analyzer_engine().analyze(**kwargs)
44
  st.session_state.analyze_results = results
45
 
 
106
  )
107
 
108
  if st.session_state.excluded_words:
109
+ exclude_manual_input()
110
 
111
  if st.session_state.allowed_words:
112
+ allow_manual_input()
113
 
114
  logging.info(
115
  f"analyse results: {st.session_state.analyze_results}\n"
116
  )
117
 
118
 
119
+ def exclude_manual_input():
120
  deny_list = [i.strip() for i in st.session_state.excluded_words.split(',')]
121
  def _deny_list_to_regex(deny_list):
122
  """
 
148
  score=1.0,
149
  )
150
 
151
+ # check if already in detected strings
152
+ found=False
153
+ for token in st.session_state.analyze_results:
154
+ if token.start==start and token.end==end:
155
+ found=True
156
+ if found==False:
157
+ results.append(pattern_result)
158
 
159
  results = EntityRecognizer.remove_duplicates(results)
160
 
 
164
  f"analyse results after adding excluded words: {st.session_state.analyze_results}\n"
165
  )
166
 
167
+ def allow_manual_input():
168
  analyze_results_fltered=[]
169
 
170
  for token in st.session_state.analyze_results:
 
175
  )
176
  st.session_state.analyze_results = analyze_results_fltered
177
 
178
+ @st.cache_resource(show_spinner="Fetching model from cache...")
179
  def anonymizer_engine():
180
  """Return AnonymizerEngine."""
181
  return AnonymizerEngine()
 
208
  def clear_results():
209
  st.session_state.anon_results=""
210
  st.session_state.analyze_results=""
 
211
 
212
  #######################################
213
  #### Initialize "global" variables ####
 
297
  annotated_text(*annotated_tokens)
298
  st.write(st.session_state.analyze_results)
299
  if not st.session_state.analyze_results and analyze_now and not st.session_state.text_error:
300
+ st.write("### No PII was found. ###")
301
 
302
  with col2:
303
  if st.session_state.anon_results:
304
  st.write(st.session_state.anon_results.text)
305
  if not st.session_state.analyze_results and anonymise_now and not st.session_state.text_error:
306
+ st.write("### No PII was found. ###")
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  detoxify==0.5.1
2
- flair==0.11
3
- presidio-anonymizer
4
- presidio-analyzer
5
- st-annotated-text
6
- spacy>=3.0.0,<4.0.0
7
  https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz#egg=en_core_web_lg
 
1
  detoxify==0.5.1
2
+ flair==0.12.2
3
+ presidio-anonymizer==2.2.33
4
+ presidio-analyzer==2.2.33
5
+ st-annotated-text==4.0.1
6
+ spacy==3.7.1
7
  https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.0.0/en_core_web_lg-3.0.0.tar.gz#egg=en_core_web_lg