pii-public-demo

Runtime error

App Files Files Community

pii-public-demo / app.py

loubnabnl HF Staff

update pii link

a44cfc4 over 2 years ago

raw

history blame contribute delete

1.94 kB

	"""
	This code was inspired from https://huggingface.co/spaces/HugoLaurencon/examples_before_after_pii/
	and https://huggingface.co/spaces/SaulLu/diff-visualizer
	"""

	import streamlit as st
	from datasets import load_dataset
	import diff_viewer

	st.set_page_config(page_title="PII Visualization", layout="wide")
	st.title("PII Anonymization 🔐")

	st.markdown("This demo allows the visualization of personal information anonymization on some code files. \
	This is just an illustration of [BigCode's PII pipeline](https://github.com/bigcode-project/bigcode-dataset/tree/main/pii) results and the examples and secrets are synthetic.")

	@st.cache()
	def load_data(language="python"):
	# load dataset with modified files with: content, references and language columns
	dataset = load_dataset("data", split="train")
	return dataset


	def get_samples_tag(dataset, tag):
	# add column id to be able to retrieve the sample
	tmp = dataset.add_column("index", range(len(dataset)))
	samples = tmp.filter(lambda x: "PI:" + tag.upper() in x['references'])
	return samples["index"]


	col1, col2 = st.columns([2, 4])
	with col1:
	#TODO add examples in more languages
	lang = st.selectbox("Select a programming language", ["Python"])

	samples = load_data(language=lang.lower())
	max_docs = len(samples)

	with col1:
	index_example = st.number_input(f"Choose an example from the existing {max_docs}:", min_value=0, max_value=max_docs-1, value=0, step=1)


	st.markdown("Below we highlight the difference in code before and after the PII on the chosen synthetic example:")

	example = samples[index_example]
	delimiter = f"PI:"
	count = example["references"].count(delimiter)

	col1, col2, col3 = st.columns([0.4, 1, 1])
	with col2:
	st.subheader(f"Code before PII redaction")
	with col3:
	st.subheader(f"Code after PII redaction")
	diff_viewer.diff_viewer(old_text=example["content"], new_text=example["new_content"], lang="none")