Spaces:

VED-AGI-1
/

Medica_DecisionSupportAI

Sleeping

App Files Files

xet

Community

VED-AGI-1 commited on 28 days ago

Commit

467b40f

verified ·

1 Parent(s): f61e31c

Update privacy.py

Browse files

Files changed (1) hide show

privacy.py +63 -12

privacy.py CHANGED Viewed

@@ -1,17 +1,68 @@
 import re
-# Simple, configurable redaction. Replace with your enterprise DLP when ready.
-RE_PATTERNS = {
-    "MRN": re.compile(r"\b(?:MRN|Med(?:ical)?\s*Record)\s*[:#]?\s*\d{5,10}\b", re.I),
-    "DOB": re.compile(r"\b(?:DOB|DoB|Birth\s*Date)\s*[:#]?\s*(\d{4}[-/]\d{2}[-/]\d{2}|\d{2}[-/]\d{2}[-/]\d{4})\b", re.I),
-    "PHONE": re.compile(r"\b(?:\+?\d{1,2}\s*)?(?:\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})\b"),
-    "EMAIL": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
-}
 def redact_text(text: str) -> str:
-    if not text:
         return text
-    red = text
-    for pat in RE_PATTERNS.values():
-        red = pat.sub("[REDACTED]", red)
-    return red

 import re
+# privacy.py
+import re
+from typing import Tuple
+# Healthcare-specific PHI patterns
+PHI_PATTERNS = [
+    # Names
+    r'\b(Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b',
+    # Medical record numbers
+    r'\b(MRN|Patient ID|Medical Record)\s*:?\s*\d+\b',
+    # Health IDs
+    r'\b(Health Card|Insurance ID)\s*:?\s*[A-Z0-9]+\b',
+    # Dates of birth
+    r'\b(DOB|Date of Birth)\s*:?\s*\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
+    # Phone numbers
+    r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b',
+    # Email addresses
+    r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
+    # Ages (when combined with other info)
+    r'\b\d+\s*(years old|y\.o\.|yo)\b',
+]
 def redact_text(text: str) -> str:
+    """Redact potential PHI from text."""
+    if not isinstance(text, str):
         return text
+    redacted = text
+    for pattern in PHI_PATTERNS:
+        redacted = re.sub(pattern, '[REDACTED]', redacted, flags=re.IGNORECASE)
+    return redacted
+def safety_filter(text: str, mode: str = "input") -> Tuple[str, bool, str]:
+    """
+    Enhanced safety filter for healthcare content.
+    Returns: (safe_text, blocked, reason)
+    """
+    if not isinstance(text, str):
+        return text, False, ""
+    # Check for PHI
+    has_phi = any(re.search(pattern, text, re.IGNORECASE) for pattern in PHI_PATTERNS)
+    if has_phi:
+        if mode == "input":
+            return "", True, "Input contains potential Protected Health Information (PHI). Please remove any personal information."
+        else:
+            redacted = redact_text(text)
+            return redacted, False, "Output contained PHI which has been redacted."
+    # Add general safety checks
+    harmful_patterns = [
+        r'\b(self-harm|suicide|kill myself)\b',
+        r'\b(medical advice|diagnosis|treatment)\b.*\b(you should|you must)\b',
+    ]
+    for pattern in harmful_patterns:
+        if re.search(pattern, text, re.IGNORECASE):
+            return "", True, "Input contains potentially harmful content."
+    return text, False, ""
+def refusal_reply(reason: str) -> str:
+    """Generate a refusal message based on the reason."""
+    return f"I cannot process this request because: {reason}"