Spaces:
Running
Running
File size: 6,431 Bytes
c5fbe61 5d03634 c5fbe61 ee51c67 c5fbe61 bfe35e3 4c2841b bfe35e3 c5fbe61 47850ae b58c988 67d1057 82320dd 67d1057 7a26c3c 655a721 969e6e1 0879fb6 b58c988 47850ae c5fbe61 b58c988 c5fbe61 47850ae b58c988 c5fbe61 47850ae c5fbe61 b58c988 47850ae b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 47850ae c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 b58c988 c5fbe61 47850ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
import os
from pii_transform.api.e2e import PiiTextProcessor
from pii_extract.defs import FMT_CONFIG_PLUGIN
examples = []
with open("examples.txt", "r") as f:
examples = f.readlines()
examples_truncated = [example[:50] + "..." for example in examples]
language_choices = {
"English": "en",
"Italian": "it",
"Spanish": "es",
"Portuguese": "pt",
"German": "de",
"French": "fr",
}
language_code = "en"
cache_dir = "/home/user/app/cache"
os.makedirs(cache_dir, exist_ok=True)
if os.path.isdir(cache_dir):
gr.Info("Cache directory created at "+cache_dir)
else:
gr.Warning("Cache directory creation error")
policy_help_string = """
Policies are defined as follows:
1. **Annotate** - replace the PII instance by a \<TYPE:VALUE\> string, i.e. include both the PII type and its value
2. **Redact** - all PII instances are replaced by a \<PII\> generic string
3. **Placeholder** - replace with a prototypical value
4. **Synthetic** - substitute with synthetic data
For more information on the transformation policies, please refer to the guide [here](https://github.com/piisa/pii-transform/blob/main/doc/policies.md#pii-transformation-policies)"""
header_string = """
## [PIISA](https://privacyprotection.substack.com/p/towards-a-common-privacy-api-introducing)
**PIISA** (Personally Identifiable Information Standard Architecture) is a set of tools to detect and remediate
PII within large scale language data. It uses best of breed tools like [🤗 transformers](https://huggingface.co/docs/transformers/index) libraries,
[spaCy](https://spacy.io/), regular expressions, [Faker](https://faker.readthedocs.io/en/master/) and [Presidio](https://microsoft.github.io/presidio/)
to leverage best practices for effectively managing data privacy in accordance with your privacy policies.
Important links:
1. [PIISA API docs](https://github.com/piisa/piisa)
2. [Blog](https://privacyprotection.substack.com/)
3. [LinkedIn](https://www.linkedin.com/company/piisa/)
This demo uses the multi-lingual [wikineural model](https://huggingface.co/Babelscape/wikineural-multilingual-ner) from [Babelscape](https://huggingface.co/Babelscape).
### ▵ We're looking for any feedback and/or suggestions, so please open a new thread in the Discussions tab ▵
"""
def change_language(language_selection):
global language_code
language_code = language_choices[language_selection]
gr.Info(f"{language_selection} selected")
def process(text, policy):
# Create the object, defining the language to use and the policy
# Further customization is possible by providing a config
policy = policy.lower()
if text == "":
print("Empty text field")
gr.Warning("No text present")
return ""
# Custom config to prevent loading of the Presidio plugin
proc = PiiTextProcessor(
lang=language_code, default_policy=policy, config="config.json"
)
# Process a text buffer and get the transformed buffer
outbuf = proc(text)
return outbuf
def get_full_example(idx):
return examples[idx]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Markdown(value=header_string)
with gr.Column(scale=0, min_width=100):
pass
with gr.Column(scale=0, min_width=100):
logo = gr.Image(
"image.jpeg",
height=100,
width=100,
show_label=False,
show_download_button=False,
show_share_button=False,
mask_opacity=1.0,
)
with gr.Row():
with gr.Column(scale=2, min_width=400):
text_original = gr.Textbox(
label="Original Text",
lines=13,
placeholder="Enter the text you would like to analyze, or select from one of the examples below",
)
with gr.Column(scale=0, min_width=25):
pass
with gr.Column(scale=0, min_width=150):
gr.Markdown(value="""<p style="text-align: center;">Select Language</p>""")
lang_picker = gr.Dropdown(
choices=list(language_choices.keys()),
label="",
value=list(language_choices.keys())[0],
type="value",
container=False,
)
lang_picker.select(change_language, inputs=lang_picker, outputs=None)
gr.Markdown(value="""<p style="text-align: center;">Select Policy</p>""")
annotate_btn = gr.Button(value="Annotate", variant="primary", size="sm")
redact_btn = gr.Button(value="Redact", variant="primary", size="sm")
anonymize_btn = gr.Button(value="Synthetic", variant="primary", size="sm")
placeholder_btn = gr.Button(
value="Placeholder", variant="primary", size="sm"
)
with gr.Column(scale=0, min_width=25):
pass
with gr.Column(
scale=2,
min_width=400,
):
text_modified = gr.TextArea(
label="Transformed Text",
lines=13,
show_copy_button=True,
interactive=False,
)
annotate_btn.click(
fn=process, inputs=[text_original, annotate_btn], outputs=text_modified
)
redact_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="redact", visible=False),
],
outputs=text_modified,
)
anonymize_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="synthetic", visible=False),
],
outputs=text_modified,
)
placeholder_btn.click(
fn=process,
inputs=[
text_original,
gr.Text(value="placeholder", visible=False),
],
outputs=text_modified,
)
with gr.Row():
example_selector = gr.Dropdown(
examples_truncated, type="index", label="Examples"
)
example_selector.select(
get_full_example, inputs=example_selector, outputs=[text_original]
)
with gr.Accordion(label="Help Panel", open=False):
gr.Markdown(value=policy_help_string)
demo.queue().launch()
|