File size: 1,119 Bytes
41e004f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from presidio_helpers import analyzer_engine, analyze, anonymize


def test_streamlit_logic():
    st_model = "en"  # st_model = "StanfordAIMI/stanford-deidentifier-base"
    st_model_package = "stanza"  ##st_model_package = "HuggingFace"
    st_ta_key = None
    st_ta_endpoint = None

    analyzer_params = (st_model_package, st_model, st_ta_key, st_ta_endpoint)

    # Read default text
    with open("demo_text.txt") as f:
        demo_text = f.readlines()

    st_text = "".join(demo_text)

    # instantiate and cache AnalyzerEngine
    analyzer_engine(*analyzer_params)

    # Analyze
    st_analyze_results = analyze(
        *analyzer_params,
        text=st_text,
        entities="All",
        language="en",
        score_threshold=0.35,
        return_decision_process=True,
        allow_list=[],
        deny_list=[],
    )

    # Anonymize
    st_anonymize_results = anonymize(
        text=st_text,
        operator="replace",
        mask_char=None,
        number_of_chars=None,
        encrypt_key=None,
        analyze_results=st_analyze_results,
    )

    assert st_anonymize_results.text != ""