VED-AGI-1 commited on
Commit
467b40f
·
verified ·
1 Parent(s): f61e31c

Update privacy.py

Browse files
Files changed (1) hide show
  1. privacy.py +63 -12
privacy.py CHANGED
@@ -1,17 +1,68 @@
1
  import re
2
 
3
- # Simple, configurable redaction. Replace with your enterprise DLP when ready.
4
- RE_PATTERNS = {
5
- "MRN": re.compile(r"\b(?:MRN|Med(?:ical)?\s*Record)\s*[:#]?\s*\d{5,10}\b", re.I),
6
- "DOB": re.compile(r"\b(?:DOB|DoB|Birth\s*Date)\s*[:#]?\s*(\d{4}[-/]\d{2}[-/]\d{2}|\d{2}[-/]\d{2}[-/]\d{4})\b", re.I),
7
- "PHONE": re.compile(r"\b(?:\+?\d{1,2}\s*)?(?:\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4})\b"),
8
- "EMAIL": re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
9
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def redact_text(text: str) -> str:
12
- if not text:
 
13
  return text
14
- red = text
15
- for pat in RE_PATTERNS.values():
16
- red = pat.sub("[REDACTED]", red)
17
- return red
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
 
3
+ # privacy.py
4
+ import re
5
+ from typing import Tuple
6
+
7
+ # Healthcare-specific PHI patterns
8
+ PHI_PATTERNS = [
9
+ # Names
10
+ r'\b(Mr|Mrs|Ms|Dr|Prof)\.?\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b',
11
+ # Medical record numbers
12
+ r'\b(MRN|Patient ID|Medical Record)\s*:?\s*\d+\b',
13
+ # Health IDs
14
+ r'\b(Health Card|Insurance ID)\s*:?\s*[A-Z0-9]+\b',
15
+ # Dates of birth
16
+ r'\b(DOB|Date of Birth)\s*:?\s*\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b',
17
+ # Phone numbers
18
+ r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b',
19
+ # Email addresses
20
+ r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
21
+ # Ages (when combined with other info)
22
+ r'\b\d+\s*(years old|y\.o\.|yo)\b',
23
+ ]
24
 
25
  def redact_text(text: str) -> str:
26
+ """Redact potential PHI from text."""
27
+ if not isinstance(text, str):
28
  return text
29
+
30
+ redacted = text
31
+ for pattern in PHI_PATTERNS:
32
+ redacted = re.sub(pattern, '[REDACTED]', redacted, flags=re.IGNORECASE)
33
+
34
+ return redacted
35
+
36
+ def safety_filter(text: str, mode: str = "input") -> Tuple[str, bool, str]:
37
+ """
38
+ Enhanced safety filter for healthcare content.
39
+ Returns: (safe_text, blocked, reason)
40
+ """
41
+ if not isinstance(text, str):
42
+ return text, False, ""
43
+
44
+ # Check for PHI
45
+ has_phi = any(re.search(pattern, text, re.IGNORECASE) for pattern in PHI_PATTERNS)
46
+
47
+ if has_phi:
48
+ if mode == "input":
49
+ return "", True, "Input contains potential Protected Health Information (PHI). Please remove any personal information."
50
+ else:
51
+ redacted = redact_text(text)
52
+ return redacted, False, "Output contained PHI which has been redacted."
53
+
54
+ # Add general safety checks
55
+ harmful_patterns = [
56
+ r'\b(self-harm|suicide|kill myself)\b',
57
+ r'\b(medical advice|diagnosis|treatment)\b.*\b(you should|you must)\b',
58
+ ]
59
+
60
+ for pattern in harmful_patterns:
61
+ if re.search(pattern, text, re.IGNORECASE):
62
+ return "", True, "Input contains potentially harmful content."
63
+
64
+ return text, False, ""
65
+
66
+ def refusal_reply(reason: str) -> str:
67
+ """Generate a refusal message based on the reason."""
68
+ return f"I cannot process this request because: {reason}"