the-stack-inspection

Sleeping

App Files Files Community

the-stack-inspection / app.py

loubnabnl HF staff

Duplicate from bigcode/pii-test

461c45d over 1 year ago

raw

history blame

No virus

1.87 kB

	"""
	This code was adapted from https://huggingface.co/spaces/HugoLaurencon/examples_before_after_pii/
	"""

	import streamlit as st
	import json
	import pandas as pd

	st.set_page_config(page_title="PII Visualization", layout="wide")
	st.title("PII Visualization")

	tags = ["KEY", "IP_ADDRESS", "EMAIL"]
	types = ["False positives", "False negatives"]
	matches = {"False negatives": "fn", "False positives": "fp"}

	@st.cache()
	def load_data():
	with open(f"data/{chosen_tag.lower()}_detections_{matches[chosen_type]}.json", "r") as f:
	samples = json.load(f)
	return samples

	col1, col2, col3 = st.columns([1, 1, 4])
	with col1:
	chosen_type = st.selectbox(
	label="Select the type of detections",
	options=types,
	index=0)
	with col2:
	chosen_tag = st.selectbox(
	label="Select the PII TAG",
	options=tags,
	index=0)

	samples = load_data()
	max_docs = len(samples)

	col1, col2 = st.columns([2, 4])
	with col1:
	index_example = st.number_input(f"Index of the chosen example from the existing {max_docs}", min_value=0, max_value=max_docs-1, value=0, step=1)

	st.write("Scroll down to visualize PII detections highlighted in yellow, we split the text at the start and end of the key to highlight it.")

	detection = samples[index_example]
	delimiter = f"PI:{matches[chosen_type].upper()}"
	count = detection.count(delimiter)

	st.subheader(f"{count} {chosen_type.lower()} for {chosen_tag} tag in example {index_example}:")

	subparts = []
	advance, found = 0, 0
	last_part = detection
	while found < count:
	start = advance + last_part.index(delimiter)
	end = advance + last_part.index("END_PI")+ 6
	st.code(detection[advance:start])
	st.markdown("<span style=\"background-color: #FFFF00\">"+detection[start:end]+"</span>", unsafe_allow_html=True)
	last_part = detection[end:]
	advance = end
	found += 1
	st.code(last_part)