Spaces:
Runtime error
Runtime error
""" | |
This code was inspired from https://huggingface.co/spaces/HugoLaurencon/examples_before_after_pii/ | |
and https://huggingface.co/spaces/SaulLu/diff-visualizer | |
""" | |
import streamlit as st | |
from datasets import load_dataset | |
import diff_viewer | |
st.set_page_config(page_title="PII Visualization", layout="wide") | |
st.title("PII Anonymization 🔐") | |
st.markdown("This demo allows the visualization of personal information anonymization on some code files. \ | |
This is just an illustration of [BigCode's PII pipeline](https://github.com/bigcode-project/bigcode-dataset/tree/main/pii) results and the examples and secrets are **synthetic**.") | |
def load_data(language="python"): | |
# load dataset with modified files with: content, references and language columns | |
dataset = load_dataset("data", split="train") | |
return dataset | |
def get_samples_tag(dataset, tag): | |
# add column id to be able to retrieve the sample | |
tmp = dataset.add_column("index", range(len(dataset))) | |
samples = tmp.filter(lambda x: "PI:" + tag.upper() in x['references']) | |
return samples["index"] | |
col1, col2 = st.columns([2, 4]) | |
with col1: | |
#TODO add examples in more languages | |
lang = st.selectbox("Select a programming language", ["Python"]) | |
samples = load_data(language=lang.lower()) | |
max_docs = len(samples) | |
with col1: | |
index_example = st.number_input(f"Choose an example from the existing {max_docs}:", min_value=0, max_value=max_docs-1, value=0, step=1) | |
st.markdown("Below we highlight the difference in code before and after the PII on the chosen synthetic example:") | |
example = samples[index_example] | |
delimiter = f"PI:" | |
count = example["references"].count(delimiter) | |
col1, col2, col3 = st.columns([0.4, 1, 1]) | |
with col2: | |
st.subheader(f"Code before PII redaction") | |
with col3: | |
st.subheader(f"Code after PII redaction") | |
diff_viewer.diff_viewer(old_text=example["content"], new_text=example["new_content"], lang="none") |