ash0ts commited on
Commit
7e16d4f
·
1 Parent(s): 778809b

Add regex variant workflow

Browse files
guardrails_genie/guardrails/pii/__init__.py ADDED
File without changes
guardrails_genie/guardrails/pii/regex_pii_guardrail.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Optional, ClassVar
2
+
3
+ import weave
4
+ from pydantic import BaseModel
5
+
6
+ from ...regex_model import RegexModel
7
+ from ..base import Guardrail
8
+
9
+
10
+ class RegexPIIGuardrailResponse(BaseModel):
11
+ contains_pii: bool
12
+ detected_pii_types: Dict[str, list[str]]
13
+ safe_to_process: bool
14
+ explanation: str
15
+
16
+
17
+ class RegexPIIGuardrail(Guardrail):
18
+ regex_model: RegexModel
19
+ patterns: Dict[str, str] = {}
20
+
21
+ DEFAULT_PII_PATTERNS: ClassVar[Dict[str, str]] = {
22
+ "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
23
+ "phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
24
+ "ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
25
+ "credit_card": r"\b\d{4}[-.]?\d{4}[-.]?\d{4}[-.]?\d{4}\b",
26
+ "ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
27
+ "date_of_birth": r"\b\d{2}[-/]\d{2}[-/]\d{4}\b",
28
+ "passport": r"\b[A-Z]{1,2}[0-9]{6,9}\b",
29
+ "drivers_license": r"\b[A-Z]\d{7}\b",
30
+ "bank_account": r"\b\d{8,17}\b",
31
+ "zip_code": r"\b\d{5}(?:[-]\d{4})?\b"
32
+ }
33
+
34
+ def __init__(self, use_defaults: bool = True, **kwargs):
35
+ patterns = {}
36
+ if use_defaults:
37
+ patterns = self.DEFAULT_PII_PATTERNS.copy()
38
+ if kwargs.get("patterns"):
39
+ patterns.update(kwargs["patterns"])
40
+
41
+ # Create the RegexModel instance
42
+ regex_model = RegexModel(patterns=patterns)
43
+
44
+ # Initialize the base class with both the regex_model and patterns
45
+ super().__init__(regex_model=regex_model, patterns=patterns)
46
+
47
+ @weave.op()
48
+ def guard(self, prompt: str, **kwargs) -> RegexPIIGuardrailResponse:
49
+ """
50
+ Check if the input prompt contains any PII based on the regex patterns.
51
+
52
+ Args:
53
+ prompt: Input text to check for PII
54
+
55
+ Returns:
56
+ RegexPIIGuardrailResponse containing PII detection results and recommendations
57
+ """
58
+ result = self.regex_model.check(prompt)
59
+
60
+ # Create detailed explanation
61
+ explanation_parts = []
62
+ if result.matched_patterns:
63
+ explanation_parts.append("Found the following PII in the text:")
64
+ for pii_type, matches in result.matched_patterns.items():
65
+ explanation_parts.append(f"- {pii_type}: {len(matches)} instance(s)")
66
+ else:
67
+ explanation_parts.append("No PII detected in the text.")
68
+
69
+ if result.failed_patterns:
70
+ explanation_parts.append("\nChecked but did not find these PII types:")
71
+ for pattern in result.failed_patterns:
72
+ explanation_parts.append(f"- {pattern}")
73
+
74
+ return RegexPIIGuardrailResponse(
75
+ contains_pii=not result.passed,
76
+ detected_pii_types=result.matched_patterns,
77
+ safe_to_process=result.passed,
78
+ explanation="\n".join(explanation_parts)
79
+ )
guardrails_genie/guardrails/pii/run_regex_model.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from guardrails_genie.guardrails.pii.regex_pii_guardrail import RegexPIIGuardrail
2
+ import weave
3
+
4
+ def run_regex_model():
5
+ weave.init("guardrails-genie-pii-regex-model")
6
+ # Create the guardrail
7
+ pii_guardrail = RegexPIIGuardrail(use_defaults=True)
8
+
9
+ # Check a prompt
10
+ prompt = "Please contact john.doe@email.com or call 123-456-7890"
11
+ result = pii_guardrail.guard(prompt)
12
+ print(result)
13
+
14
+ # Result will contain:
15
+ # - contains_pii: True
16
+ # - detected_pii_types: {"email": ["john.doe@email.com"], "phone_number": ["123-456-7890"]}
17
+ # - safe_to_process: False
18
+ # - explanation: Detailed explanation of findings
19
+
20
+ if __name__ == "__main__":
21
+ run_regex_model()
guardrails_genie/regex_model.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Optional
2
+ import re
3
+ import weave
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class RegexResult(BaseModel):
8
+ passed: bool
9
+ matched_patterns: Dict[str, List[str]]
10
+ failed_patterns: List[str]
11
+
12
+
13
+ class RegexModel(weave.Model):
14
+ patterns: Dict[str, str]
15
+
16
+ def __init__(self, patterns: Dict[str, str]) -> None:
17
+ """
18
+ Initialize RegexModel with a dictionary of patterns.
19
+
20
+ Args:
21
+ patterns: Dictionary where key is pattern name and value is regex pattern
22
+ Example: {"email": r"[^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+",
23
+ "phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"}
24
+ """
25
+ super().__init__(patterns=patterns)
26
+ self._compiled_patterns = {
27
+ name: re.compile(pattern) for name, pattern in patterns.items()
28
+ }
29
+
30
+ @weave.op()
31
+ def check(self, text: str) -> RegexResult:
32
+ """
33
+ Check text against all patterns and return detailed results.
34
+
35
+ Args:
36
+ text: Input text to check against patterns
37
+
38
+ Returns:
39
+ RegexResult containing pass/fail status and details about matches
40
+ """
41
+ matches: Dict[str, List[str]] = {}
42
+ failed_patterns: List[str] = []
43
+
44
+ for pattern_name, compiled_pattern in self._compiled_patterns.items():
45
+ found_matches = compiled_pattern.findall(text)
46
+ if found_matches:
47
+ matches[pattern_name] = found_matches
48
+ else:
49
+ failed_patterns.append(pattern_name)
50
+
51
+ # Consider it passed only if no patterns matched (no PII found)
52
+ passed = len(matches) == 0
53
+
54
+ return RegexResult(
55
+ passed=passed,
56
+ matched_patterns=matches,
57
+ failed_patterns=failed_patterns
58
+ )
59
+
60
+ @weave.op()
61
+ def predict(self, text: str) -> RegexResult:
62
+ """
63
+ Alias for check() to maintain consistency with other models.
64
+ """
65
+ return self.check(text)