arogeriogel commited on
Commit
28951d0
1 Parent(s): 82660a6

including metadata

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -3,7 +3,8 @@ import streamlit as st
3
  import re
4
  import logging
5
  from presidio_anonymizer import AnonymizerEngine
6
- from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
 
7
  from annotated_text import annotated_text
8
  from flair_recognizer import FlairRecognizer
9
 
@@ -36,9 +37,17 @@ def analyzer_engine():
36
 
37
  def analyze(**kwargs):
38
  """Analyze input using Analyzer engine and input arguments (kwargs)."""
 
 
39
  if "entities" not in kwargs or "All" in kwargs["entities"]:
40
  kwargs["entities"] = None
41
- return analyzer_engine().analyze(**kwargs)
 
 
 
 
 
 
42
 
43
  def annotate():
44
  text = st.session_state.text
@@ -88,8 +97,8 @@ def analyze_text():
88
  return_decision_process=False,
89
  )
90
 
91
- if st.session_state.excluded_words:
92
- analyze_results = include_manual_input(analyze_results)
93
 
94
  if st.session_state.allowed_words:
95
  analyze_results = exclude_manual_input(analyze_results)
@@ -99,29 +108,29 @@ def analyze_text():
99
  logging.info(
100
  f"analyse results: {st.session_state.analyze_results}\n"
101
  )
102
-
103
 
104
- def include_manual_input(analyze_results):
105
- analyze_results_extended=analyze_results
106
- logging.info(
107
- f"analyse results before adding extra words: {analyze_results}\n"
108
- )
109
- for word in st.session_state.excluded_words:
110
- if word in st.session_state.text:
111
- r = re.compile(word)
112
- index_entries = [[m.start(),m.end()] for m in r.finditer(st.session_state.text)]
113
- for entry in index_entries:
114
- start=entry[0]
115
- end=entry[1]
 
116
 
117
- analyze_results_extended.append({"type": "MANUAL ADD", "start": start, "end": end, "score": 1.0})
118
- logging.info(
119
- f"analyse results after adding allowed words: {analyze_results_extended}\n"
120
- )
121
- logging.info(
122
- f"type of entries in results: {type(analyze_results[0])}\n"
123
- )
124
- return analyze_results_extended
125
 
126
  def exclude_manual_input(analyze_results):
127
  analyze_results_fltered=[]
 
3
  import re
4
  import logging
5
  from presidio_anonymizer import AnonymizerEngine
6
+ from presidio_analyzer import AnalyzerEngine, PatternRecognizer
7
+
8
  from annotated_text import annotated_text
9
  from flair_recognizer import FlairRecognizer
10
 
 
37
 
38
  def analyze(**kwargs):
39
  """Analyze input using Analyzer engine and input arguments (kwargs)."""
40
+ analyzer_engine = analyzer_engine()
41
+
42
  if "entities" not in kwargs or "All" in kwargs["entities"]:
43
  kwargs["entities"] = None
44
+
45
+ if st.session_state.excluded_words:
46
+ excluded_words_recognizer = PatternRecognizer(supported_entity="MANUAL ADD",
47
+ deny_list=st.session_state.excluded_words)
48
+ analyzer_engine.registry.add_recognizer(excluded_words_recognizer)
49
+
50
+ return analyzer_engine.analyze(**kwargs)
51
 
52
  def annotate():
53
  text = st.session_state.text
 
97
  return_decision_process=False,
98
  )
99
 
100
+ # if st.session_state.excluded_words:
101
+ # analyze_results = include_manual_input(analyze_results)
102
 
103
  if st.session_state.allowed_words:
104
  analyze_results = exclude_manual_input(analyze_results)
 
108
  logging.info(
109
  f"analyse results: {st.session_state.analyze_results}\n"
110
  )
 
111
 
112
+
113
+ # def include_manual_input(analyze_results):
114
+ # analyze_results_extended=analyze_results
115
+ # logging.info(
116
+ # f"analyse results before adding extra words: {analyze_results}\n"
117
+ # )
118
+ # for word in st.session_state.excluded_words:
119
+ # if word in st.session_state.text:
120
+ # r = re.compile(word)
121
+ # index_entries = [[m.start(),m.end()] for m in r.finditer(st.session_state.text)]
122
+ # for entry in index_entries:
123
+ # start=entry[0]
124
+ # end=entry[1]
125
 
126
+ # analyze_results_extended.append("type": "MANUAL ADD", "start": start, "end": end, "score": 1.0})
127
+ # logging.info(
128
+ # f"analyse results after adding allowed words: {analyze_results_extended}\n"
129
+ # )
130
+ # logging.info(
131
+ # f"type of entries in results: {type(analyze_results[0])}\n"
132
+ # )
133
+ # return analyze_results_extended
134
 
135
  def exclude_manual_input(analyze_results):
136
  analyze_results_fltered=[]