| from flask import Flask, render_template_string, request, jsonify |
| from flask_cors import CORS |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline |
| import os |
| import sys |
| import threading |
| import time |
|
|
| app = Flask(__name__) |
| CORS(app) |
|
|
| |
| model_name = "openai/privacy-filter" |
| classifier = None |
| model_loading = False |
| model_error = None |
| model_thread = None |
|
|
| |
| def load_model_async(): |
| global classifier, model_loading, model_error |
| model_loading = True |
| |
| print("="*60, flush=True) |
| print("BACKGROUND: Loading OpenAI Privacy Filter model...", flush=True) |
| print("="*60, flush=True) |
| |
| try: |
| print(f"Loading tokenizer and model: {model_name}", flush=True) |
| print("This may take 5-10 minutes on first run...", flush=True) |
| |
| |
| tokenizer = AutoTokenizer.from_pretrained( |
| model_name, |
| cache_dir="/app/.cache/huggingface" |
| ) |
| model = AutoModelForTokenClassification.from_pretrained( |
| model_name, |
| cache_dir="/app/.cache/huggingface" |
| ) |
| |
| global classifier |
| classifier = pipeline( |
| task="token-classification", |
| model=model, |
| tokenizer=tokenizer, |
| aggregation_strategy="simple", |
| device=-1 |
| ) |
| |
| print("✓ Model loaded successfully!", flush=True) |
| model_error = None |
| except Exception as e: |
| model_error = str(e) |
| print(f"✗ ERROR loading model: {e}", flush=True) |
| import traceback |
| traceback.print_exc() |
| finally: |
| model_loading = False |
|
|
| |
| model_thread = threading.Thread(target=load_model_async, daemon=True) |
| model_thread.start() |
|
|
| |
| HTML_TEMPLATE = ''' |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>OpenAI Privacy Filter - PII Detection Demo</title> |
| <style> |
| * { box-sizing: border-box; margin: 0; padding: 0; } |
| body { |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif; |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); |
| min-height: 100vh; |
| color: #fff; |
| padding: 20px; |
| } |
| .container { max-width: 900px; margin: 0 auto; } |
| h1 { |
| text-align: center; margin-bottom: 10px; |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 2.5rem; |
| } |
| .subtitle { text-align: center; color: #8892b0; margin-bottom: 30px; } |
| .card { |
| background: rgba(255,255,255,0.05); |
| border-radius: 12px; |
| padding: 25px; |
| margin-bottom: 20px; |
| backdrop-filter: blur(10px); |
| border: 1px solid rgba(255,255,255,0.1); |
| } |
| textarea { |
| width: 100%; min-height: 150px; padding: 15px; |
| border-radius: 8px; border: 1px solid rgba(255,255,255,0.2); |
| background: rgba(0,0,0,0.3); color: #fff; |
| font-size: 14px; resize: vertical; font-family: monospace; |
| } |
| textarea::placeholder { color: #666; } |
| button { |
| width: 100%; padding: 15px; margin-top: 15px; |
| border: none; border-radius: 8px; |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); |
| color: #fff; font-size: 16px; font-weight: 600; |
| cursor: pointer; transition: transform 0.2s, box-shadow 0.2s; |
| } |
| button:hover:not(:disabled) { |
| transform: translateY(-2px); |
| box-shadow: 0 5px 25px rgba(0,212,255,0.4); |
| } |
| button:disabled { |
| opacity: 0.6; cursor: not-allowed; |
| background: linear-gradient(90deg, #666, #444); |
| } |
| .results { display: none; } |
| .results.active { display: block; } |
| .result-text { |
| background: rgba(0,0,0,0.3); padding: 20px; |
| border-radius: 8px; font-family: monospace; |
| line-height: 1.8; word-wrap: break-word; |
| white-space: pre-wrap; |
| } |
| .entity { |
| padding: 2px 8px; border-radius: 4px; |
| font-weight: bold; |
| } |
| .entity-private_person { background: rgba(255,107,107,0.3); border: 1px solid #ff6b6b; } |
| .entity-private_email { background: rgba(78,205,196,0.3); border: 1px solid #4ecdc4; } |
| .entity-private_phone { background: rgba(255,209,102,0.3); border: 1px solid #ffd166; } |
| .entity-private_address { background: rgba(6,214,160,0.3); border: 1px solid #06d6a0; } |
| .entity-account_number { background: rgba(239,71,111,0.3); border: 1px solid #ef476f; } |
| .entity-secret { background: rgba(255,0,110,0.3); border: 1px solid #ff006e; } |
| .entity-private_url { background: rgba(131,56,236,0.3); border: 1px solid #8338ec; } |
| .entity-private_date { background: rgba(58,134,255,0.3); border: 1px solid #3a86ff; } |
| .legend { |
| display: flex; flex-wrap: wrap; gap: 10px; |
| margin-top: 15px; justify-content: center; |
| } |
| .legend-item { |
| display: flex; align-items: center; |
| gap: 5px; font-size: 12px; |
| } |
| .legend-color { |
| width: 20px; height: 20px; |
| border-radius: 4px; border: 1px solid; |
| } |
| .details-list { margin-top: 20px; } |
| .detail-item { |
| display: flex; justify-content: space-between; |
| align-items: center; padding: 12px; |
| background: rgba(255,255,255,0.03); |
| border-radius: 6px; margin-bottom: 8px; |
| } |
| .detail-type { font-weight: bold; color: #00d4ff; } |
| .detail-score { font-size: 12px; color: #8892b0; } |
| .error-box { |
| background: rgba(239,71,111,0.2); |
| border: 1px solid #ef476f; |
| padding: 15px; |
| border-radius: 8px; |
| margin-top: 15px; |
| color: #ff6b6b; |
| } |
| .info-box { |
| background: rgba(0,212,255,0.1); |
| border-left: 3px solid #00d4ff; |
| padding: 15px; margin-bottom: 20px; |
| border-radius: 0 8px 8px 0; |
| } |
| .info-box h3 { margin-bottom: 5px; } |
| .info-box ul { margin-left: 20px; color: #8892b0; } |
| .status-indicator { |
| display: inline-block; |
| width: 10px; height: 10px; |
| border-radius: 50%; |
| margin-right: 8px; |
| } |
| .status-ok { background: #06d6a0; } |
| .status-error { background: #ef476f; } |
| .status-loading { background: #ffd166; animation: pulse 1s infinite; } |
| .status-waiting { background: #3a86ff; } |
| @keyframes pulse { |
| 0%, 100% { opacity: 1; } |
| 50% { opacity: 0.3; } |
| } |
| #modelStatus { |
| text-align: center; |
| margin-bottom: 15px; |
| padding: 15px; |
| background: rgba(0,0,0,0.3); |
| border-radius: 8px; |
| font-size: 14px; |
| } |
| .loading-spinner { |
| display: inline-block; |
| width: 20px; height: 20px; |
| border: 3px solid rgba(255,255,255,0.3); |
| border-top-color: #00d4ff; |
| border-radius: 50%; |
| animation: spin 1s linear infinite; |
| margin-right: 10px; |
| vertical-align: middle; |
| } |
| @keyframes spin { |
| to { transform: rotate(360deg); } |
| } |
| .progress-bar { |
| width: 100%; |
| height: 4px; |
| background: rgba(255,255,255,0.1); |
| border-radius: 2px; |
| margin-top: 10px; |
| overflow: hidden; |
| } |
| .progress-fill { |
| height: 100%; |
| background: linear-gradient(90deg, #00d4ff, #7b2cbf); |
| animation: progress 2s ease-in-out infinite; |
| } |
| @keyframes progress { |
| 0% { width: 0%; transform: translateX(-100%); } |
| 50% { width: 70%; transform: translateX(50%); } |
| 100% { width: 0%; transform: translateX(200%); } |
| } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1>OpenAI Privacy Filter</h1> |
| <p class="subtitle">PII Detection & Masking Demo using Flask</p> |
| |
| <div id="modelStatus"> |
| <span id="statusIndicator" class="status-indicator status-loading"></span> |
| <span id="statusText">Waiting for server to start...</span> |
| <div class="progress-bar" id="progressBar"> |
| <div class="progress-fill"></div> |
| </div> |
| </div> |
| |
| <div class="info-box"> |
| <h3>Detects 8 Types of PII:</h3> |
| <ul> |
| <li><strong>private_person</strong> - Names and personal identifiers</li> |
| <li><strong>private_email</strong> - Email addresses</li> |
| <li><strong>private_phone</strong> - Phone numbers</li> |
| <li><strong>private_address</strong> - Physical addresses</li> |
| <li><strong>account_number</strong> - Account/ID numbers</li> |
| <li><strong>secret</strong> - Passwords, tokens, credentials</li> |
| <li><strong>private_url</strong> - Personal/private URLs</li> |
| <li><strong>private_date</strong> - Personal dates (birthdays, etc.)</li> |
| </ul> |
| </div> |
| |
| <div class="card"> |
| <textarea id="inputText" placeholder="Enter text with PII here...\n\nExample: My name is Alice Smith and my email is alice.smith@example.com. You can reach me at (555) 123-4567 or visit me at 123 Main Street, New York. My SSN is 123-45-6789."></textarea> |
| <button onclick="analyzeText()" id="analyzeBtn" disabled>Waiting for model...</button> |
| <div id="errorBox" class="error-box" style="display: none;"></div> |
| </div> |
| |
| <div class="card results" id="resultsCard"> |
| <h3 style="margin-bottom: 15px;">Results</h3> |
| <div class="result-text" id="resultDisplay"></div> |
| |
| <div class="legend"> |
| <div class="legend-item"><div class="legend-color entity-private_person"></div> Person</div> |
| <div class="legend-item"><div class="legend-color entity-private_email"></div> Email</div> |
| <div class="legend-item"><div class="legend-color entity-private_phone"></div> Phone</div> |
| <div class="legend-item"><div class="legend-color entity-private_address"></div> Address</div> |
| <div class="legend-item"><div class="legend-color entity-account_number"></div> Account</div> |
| <div class="legend-item"><div class="legend-color entity-secret"></div> Secret</div> |
| <div class="legend-item"><div class="legend-color entity-private_url"></div> URL</div> |
| <div class="legend-item"><div class="legend-color entity-private_date"></div> Date</div> |
| </div> |
| |
| <div class="details-list" id="detailsList"></div> |
| </div> |
| </div> |
| |
| <script> |
| let statusCheckInterval = null; |
| let isModelLoaded = false; |
| let retryCount = 0; |
| const maxRetries = 200; // 16 minutes of retrying (200 * 5 seconds) |
| |
| function updateStatus(state, message) { |
| const statusIndicator = document.getElementById("statusIndicator"); |
| const statusText = document.getElementById("statusText"); |
| const progressBar = document.getElementById("progressBar"); |
| const btn = document.getElementById("analyzeBtn"); |
| |
| switch(state) { |
| case 'connecting': |
| statusIndicator.className = "status-indicator status-waiting"; |
| statusText.innerHTML = `<span class="loading-spinner"></span>${message}`; |
| btn.disabled = true; |
| btn.textContent = "Server is starting up..."; |
| progressBar.style.display = "block"; |
| break; |
| case 'loading': |
| statusIndicator.className = "status-indicator status-loading"; |
| statusText.innerHTML = `<span class="loading-spinner"></span>${message}`; |
| btn.disabled = true; |
| btn.textContent = "Model is loading..."; |
| progressBar.style.display = "block"; |
| break; |
| case 'ready': |
| statusIndicator.className = "status-indicator status-ok"; |
| statusText.innerHTML = "✓ " + message; |
| btn.disabled = false; |
| btn.textContent = "Detect PII"; |
| progressBar.style.display = "none"; |
| break; |
| case 'error': |
| statusIndicator.className = "status-indicator status-error"; |
| statusText.innerHTML = "✗ " + message; |
| btn.disabled = true; |
| btn.textContent = "Model unavailable"; |
| progressBar.style.display = "none"; |
| break; |
| } |
| } |
| |
| // Check model status on page load and keep polling |
| async function checkModelStatus() { |
| retryCount++; |
| |
| if (retryCount > maxRetries) { |
| updateStatus('error', 'Server did not respond after 16 minutes. Refresh to retry.'); |
| clearInterval(statusCheckInterval); |
| statusCheckInterval = null; |
| // Show reload button |
| updateStatus('error', 'Server did not respond. <button onclick="location.reload()">Refresh Page</button>'); |
| return; |
| } |
| |
| try { |
| const response = await fetch("/health", { |
| method: "GET", |
| headers: { "Cache-Control": "no-cache" } |
| }); |
| |
| if (!response.ok) { |
| throw new Error(`HTTP ${response.status}`); |
| } |
| |
| const data = await response.json(); |
| console.log("Health check response:", data); |
| |
| if (data.model_loading) { |
| // Still loading |
| updateStatus('loading', `Model loading initialized... (5-10 minutes on first run)`); |
| |
| if (!statusCheckInterval) { |
| statusCheckInterval = setInterval(checkModelStatus, 5000); |
| } |
| isModelLoaded = false; |
| } else if (data.model_loaded) { |
| // Model ready |
| updateStatus('ready', 'Model loaded and ready'); |
| |
| if (statusCheckInterval) { |
| clearInterval(statusCheckInterval); |
| statusCheckInterval = null; |
| } |
| isModelLoaded = true; |
| retryCount = 0; |
| } else { |
| // Model failed |
| updateStatus('error', `Model failed: ${data.error || "Unknown error"}`); |
| |
| const errorBox = document.getElementById("errorBox"); |
| errorBox.style.display = "block"; |
| errorBox.innerHTML = `<strong>Error:</strong> ${data.error || "Unknown error"}`; |
| |
| if (statusCheckInterval) { |
| clearInterval(statusCheckInterval); |
| statusCheckInterval = null; |
| } |
| isModelLoaded = false; |
| } |
| } catch (error) { |
| console.error("Health check failed:", error); |
| // Server not ready yet, show connecting state |
| updateStatus('connecting', `Waiting for server to start... (attempt ${retryCount})`); |
| |
| if (!statusCheckInterval) { |
| statusCheckInterval = setInterval(checkModelStatus, 5000); |
| } |
| } |
| } |
| |
| // Start checking immediately with connecting state |
| checkModelStatus(); |
| |
| async function analyzeText() { |
| const text = document.getElementById("inputText").value; |
| const btn = document.getElementById("analyzeBtn"); |
| const resultsCard = document.getElementById("resultsCard"); |
| const errorBox = document.getElementById("errorBox"); |
| |
| if (!text.trim()) { |
| errorBox.style.display = "block"; |
| errorBox.textContent = "Please enter some text first!"; |
| return; |
| } |
| |
| btn.disabled = true; |
| btn.innerHTML = '<span class="loading-spinner"></span>Analyzing...'; |
| errorBox.style.display = "none"; |
| |
| try { |
| const response = await fetch("/analyze", { |
| method: "POST", |
| headers: { "Content-Type": "application/json" }, |
| body: JSON.stringify({ text: text }) |
| }); |
| |
| const data = await response.json(); |
| |
| if (!response.ok || !data.success) { |
| throw new Error(data.error || "Server error"); |
| } |
| |
| displayResults(data, text); |
| resultsCard.classList.add("active"); |
| |
| } catch (error) { |
| console.error("Error during analysis:", error); |
| errorBox.style.display = "block"; |
| errorBox.textContent = "Error: " + error.message; |
| resultsCard.classList.remove("active"); |
| } finally { |
| if (isModelLoaded) { |
| btn.disabled = false; |
| btn.textContent = "Detect PII"; |
| } |
| } |
| } |
| |
| function displayResults(data, originalText) { |
| let html = ""; |
| let lastEnd = 0; |
| |
| if (data.entities && data.entities.length > 0) { |
| const sorted = data.entities.sort((a, b) => a.start - b.start); |
| |
| for (const entity of sorted) { |
| html += escapeHtml(originalText.slice(lastEnd, entity.start)); |
| html += `<span class="entity entity-${entity.label}">${escapeHtml(entity.text)}</span>`; |
| lastEnd = entity.end; |
| } |
| html += escapeHtml(originalText.slice(lastEnd)); |
| |
| const detailsHtml = sorted.map(e => ` |
| <div class="detail-item"> |
| <div> |
| <span class="detail-type">${e.label}</span>: ${escapeHtml(e.text)} |
| </div> |
| <div class="detail-score">Score: ${(e.score * 100).toFixed(2)}%</div> |
| </div> |
| `).join(""); |
| document.getElementById("detailsList").innerHTML = "<h4 style='margin:20px 0 10px 0;'>Detected Entities:</h4>" + detailsHtml; |
| } else { |
| html = escapeHtml(originalText) + "\\n\\n[No PII detected]"; |
| document.getElementById("detailsList").innerHTML = ""; |
| } |
| |
| document.getElementById("resultDisplay").innerHTML = html; |
| } |
| |
| function escapeHtml(text) { |
| const div = document.createElement("div"); |
| div.textContent = text; |
| return div.innerHTML; |
| } |
| |
| // Cleanup on page unload |
| window.addEventListener("beforeunload", () => { |
| if (statusCheckInterval) { |
| clearInterval(statusCheckInterval); |
| } |
| }); |
| |
| // Add keyboard shortcut (Ctrl+Enter to analyze) |
| document.addEventListener('DOMContentLoaded', () => { |
| document.getElementById('inputText').addEventListener('keydown', function(e) { |
| if (e.ctrlKey && e.key === 'Enter') { |
| analyzeText(); |
| } |
| }); |
| }); |
| </script> |
| </body> |
| </html> |
| ''' |
|
|
| @app.route('/') |
| def index(): |
| return render_template_string(HTML_TEMPLATE) |
|
|
| @app.route('/health') |
| def health(): |
| """Health check with model loading status""" |
| global classifier, model_loading, model_error, model_thread |
| |
| if classifier is not None: |
| return jsonify({ |
| 'status': 'healthy', |
| 'model_loaded': True, |
| 'model_loading': False |
| }) |
| elif model_loading: |
| return jsonify({ |
| 'status': 'loading', |
| 'model_loaded': False, |
| 'model_loading': True, |
| 'message': 'Model is still loading, please wait...' |
| }) |
| else: |
| |
| return jsonify({ |
| 'status': 'unhealthy', |
| 'model_loaded': False, |
| 'model_loading': False, |
| 'error': model_error or 'Model loading failed or thread terminated unexpectedly' |
| }), 503 |
|
|
| @app.route('/analyze', methods=['POST', 'OPTIONS']) |
| def analyze(): |
| if request.method == 'OPTIONS': |
| return '', 204 |
| |
| global classifier, model_loading |
| |
| if classifier is None: |
| return jsonify({ |
| 'success': False, |
| 'error': f'Model not yet loaded. Current status: {"loading" if model_loading else "failed"}. Please wait and refresh in a few minutes.' |
| }), 503 |
| |
| try: |
| data = request.get_json() |
| |
| if not data: |
| return jsonify({'success': False, 'error': 'No JSON data received'}), 400 |
| |
| text = data.get('text', '') |
| |
| if not text.strip(): |
| return jsonify({'success': True, 'entities': [], 'entity_count': 0}) |
| |
| |
| results = classifier(text) |
| |
| entities = [] |
| for entity in results: |
| entities.append({ |
| 'label': entity.get('entity_group', entity.get('entity', 'unknown')), |
| 'text': entity.get('word', ''), |
| 'start': entity.get('start', 0), |
| 'end': entity.get('end', 0), |
| 'score': float(entity.get('score', 0)) |
| }) |
| |
| return jsonify({ |
| 'success': True, |
| 'entities': entities, |
| 'entity_count': len(entities) |
| }) |
| |
| except Exception as e: |
| print(f"Error during analysis: {e}", flush=True) |
| import traceback |
| traceback.print_exc() |
| return jsonify({ |
| 'success': False, |
| 'error': str(e) |
| }), 500 |
|
|
| if __name__ == '__main__': |
| port = int(os.environ.get('PORT', 7860)) |
| app.run(host='0.0.0.0', port=port, debug=False, threaded=True) |