File size: 3,044 Bytes
2b45d27 77a9f36 15eeb31 77a9f36 2b45d27 77a9f36 2b45d27 77a9f36 2b45d27 77a9f36 2b45d27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import os
from typing import Dict, Any
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
def predict(text: str) -> Dict[str, Any]:
"""Classify text for PII detection."""
if not text or text.strip() == "":
return {"No input provided": 0.0}
try:
# Tokenize input
inputs = tokenizer(
text,
return_tensors="pt",
padding="max_length",
max_length=512,
truncation=True
)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.sigmoid(logits)
probs = probabilities.squeeze().tolist()
# Create results dictionary
results = {
"Asking for PII": float(probs[0]),
"Giving PII": float(probs[1])
}
return results
except Exception as e:
return {"Error": str(e)}
# Example test cases
examples = [
["Do you have the blue app?"],
["I live at 901 Roosevelt St, Redwood City"],
]
if __name__ == "__main__":
# Model configuration
model_id = "Roblox/Roblox-PII-Classifier"
# Get HF token from Hugging Face Space secrets
# In Spaces, set HF_TOKEN in Settings > Repository secrets
HF_TOKEN = os.getenv("HF_TOKEN")
# Load model and tokenizer
print(f"Loading model: {model_id}")
try:
# Use token if available (required for private models)
if HF_TOKEN:
print("Using HF_TOKEN from environment/secrets")
model = AutoModelForSequenceClassification.from_pretrained(model_id, token=HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
else:
print("No HF_TOKEN found, attempting without authentication...")
model = AutoModelForSequenceClassification.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model.eval()
print("Model loaded successfully!")
except Exception as e:
print(f"Failed to load model: {e}")
if not HF_TOKEN:
print("\n⚠️ For private models, you need to set HF_TOKEN as a Space secret:")
print(" 1. Go to your Space Settings")
print(" 2. Add a new secret named 'HF_TOKEN'")
print(" 3. Set your Hugging Face token as the value")
exit(1)
# Create Gradio interface
demo = gr.Interface(
fn=predict,
inputs=gr.Textbox(
lines=3,
placeholder="Enter text to analyze for PII content...",
label="Input Text"
),
outputs=gr.Label(
num_top_classes=2,
label="Classification Results"
),
title="PII Detection Demo",
description="This model detects whether text is asking for or giving personal information (PII).",
examples=examples,
flagging_mode="never",
)
demo.launch()
|