File size: 1,867 Bytes
461c45d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
This code was adapted from https://huggingface.co/spaces/HugoLaurencon/examples_before_after_pii/
"""

import streamlit as st
import json
import pandas as pd

st.set_page_config(page_title="PII Visualization", layout="wide")
st.title("PII Visualization")

tags = ["KEY", "IP_ADDRESS", "EMAIL"]
types = ["False positives", "False negatives"]
matches = {"False negatives": "fn", "False positives": "fp"}

@st.cache()
def load_data():
    with open(f"data/{chosen_tag.lower()}_detections_{matches[chosen_type]}.json", "r") as f:
        samples = json.load(f)
    return samples
    
col1, col2, col3 = st.columns([1, 1, 4])
with col1:
    chosen_type = st.selectbox(
    label="Select the type of detections",
    options=types,
    index=0)
with col2:
    chosen_tag = st.selectbox(
    label="Select the PII TAG",
    options=tags,
    index=0)

samples = load_data()
max_docs = len(samples)

col1, col2 = st.columns([2, 4])
with col1:
    index_example = st.number_input(f"Index of the chosen example from the existing {max_docs}", min_value=0, max_value=max_docs-1, value=0, step=1)

st.write("Scroll down to visualize PII detections highlighted in yellow, we split the text at the start and end of the key to highlight it.")

detection = samples[index_example]
delimiter = f"PI:{matches[chosen_type].upper()}"
count = detection.count(delimiter)

st.subheader(f"{count} {chosen_type.lower()} for {chosen_tag} tag in example {index_example}:")

subparts = []
advance, found = 0, 0
last_part = detection
while found < count:
    start = advance + last_part.index(delimiter)
    end = advance + last_part.index("END_PI")+ 6
    st.code(detection[advance:start])
    st.markdown("<span style=\"background-color: #FFFF00\">"+detection[start:end]+"</span>", unsafe_allow_html=True)
    last_part = detection[end:]
    advance = end
    found += 1
st.code(last_part)