Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
| # 1. Load the model from your Hugging Face repository | |
| # Replace this with your exact model ID! | |
| MODEL_ID = "Negative-Star-Innovators/MiniLM-L6-finetuned-pii-detection-v2" | |
| print("Loading model...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForTokenClassification.from_pretrained(MODEL_ID) | |
| # Initialize pipeline | |
| pii_pipeline = pipeline( | |
| "token-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| aggregation_strategy="simple" | |
| ) | |
| # 2. Define the redaction function | |
| def redact_pii(text): | |
| if not text.strip(): | |
| return "" | |
| # Run the model on the input text | |
| results = pii_pipeline(text) | |
| # If no PII is found, return original text | |
| if not results: | |
| return text | |
| # Sort results in reverse order based on their start index. | |
| # Why? If we replace text from left to right, the string length changes | |
| # and messes up the start/end indexes for the remaining entities. | |
| # Going backwards prevents this! | |
| results_sorted = sorted(results, key=lambda x: x['start'], reverse=True) | |
| redacted_text = text | |
| for entity in results_sorted: | |
| start = entity['start'] | |
| end = entity['end'] | |
| label = entity['entity_group'] | |
| # Replace the sensitive text with a clean [REDACTED LABEL] tag | |
| replacement = f"[REDACTED {label.upper()}]" | |
| redacted_text = redacted_text[:start] + replacement + redacted_text[end:] | |
| return redacted_text | |
| # 3. Build the Gradio User Interface | |
| # We define the input box, the output box, and some default examples. | |
| demo = gr.Interface( | |
| fn=redact_pii, | |
| inputs=gr.Textbox( | |
| lines=5, | |
| label="Input Text", | |
| placeholder="Paste text containing sensitive data (names, emails, routing numbers) here..." | |
| ), | |
| outputs=gr.Textbox( | |
| lines=5, | |
| label="Redacted Output" | |
| ), | |
| title="π‘οΈ Secure PII Redaction Playground", | |
| description=( | |
| "Test our highly efficient (90MB) PII detection model that is capable of running locally on your device. " | |
| "It quickly scrubs Personally Identifiable Information entirely on CPU, making it perfect " | |
| "for sanitizing data before sending it to third-party cloud LLMs and other parties." | |
| ), | |
| article = ( | |
| "π§ **Please reach out if you have a question or feedback. We also do custom projects, consultating, freelance and collaboration:** [thieves@negativestarinnovators.com](mailto:thieves@negativestarinnovators.com)" | |
| ), | |
| examples=[ | |
| ["John Doe's routing number is 123456789 and his email is john.doe@email.com."], | |
| ["Please update the shipping address for Jane Smith to 123 Secure Lane. Her phone number is 555-0198."], | |
| ["The patient, Michael Johnson, was born on 10/12/1985. His SSN is 000-11-2222."] | |
| ], | |
| flagging_mode="never" # Turns off the "Flag" button since we don't need to collect user data | |
| ) | |
| # 4. Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |