Spaces:

wandb
/

guardrails-genie

Running

App Files Files Community

Anish Shah commited on Dec 2, 2024

Commit

13d2f14

unverified ·

2 Parent(s): 8382f82 06b5c11

Merge pull request #6 from soumik12345/feat/pii-banned-words

Browse files

Files changed (21) hide show

.gitignore +2 -1
application_pages/chat_app.py +28 -0
guardrails_genie/guardrails/ReadMe.md +136 -0
guardrails_genie/guardrails/__init__.py +10 -0
guardrails_genie/guardrails/entity_recognition/__init__.py +10 -0
guardrails_genie/guardrails/entity_recognition/banned_terms_examples/banned_term_benchmark.py +0 -0
guardrails_genie/guardrails/entity_recognition/banned_terms_examples/banned_term_examples.py +178 -0
guardrails_genie/guardrails/entity_recognition/banned_terms_examples/run_llm_judge.py +47 -0
guardrails_genie/guardrails/entity_recognition/banned_terms_examples/run_regex_model.py +46 -0
guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py +166 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark.py +215 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py +150 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py +42 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py +42 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py +43 -0
guardrails_genie/guardrails/entity_recognition/presidio_entity_recognition_guardrail.py +191 -0
guardrails_genie/guardrails/entity_recognition/regex_entity_recognition_guardrail.py +138 -0
guardrails_genie/guardrails/entity_recognition/transformers_entity_recognition_guardrail.py +190 -0
guardrails_genie/guardrails/manager.py +7 -4
guardrails_genie/regex_model.py +65 -0
pyproject.toml +2 -0

.gitignore CHANGED Viewed

@@ -168,4 +168,5 @@ temp.txt
 **.csv
 binary-classifier/
 wandb/
-artifacts/

 **.csv
 binary-classifier/
 wandb/
+artifacts/
+evaluation_results/

application_pages/chat_app.py CHANGED Viewed

@@ -61,6 +61,34 @@ def initialize_guardrails():
                         guardrail_name,
                     )(model_name=classifier_model_name)
                 )
     st.session_state.guardrails_manager = GuardrailManager(
         guardrails=st.session_state.guardrails
     )

                         guardrail_name,
                     )(model_name=classifier_model_name)
                 )
+        elif guardrail_name == "PresidioEntityRecognitionGuardrail":
+            st.session_state.guardrails.append(
+                getattr(
+                    importlib.import_module("guardrails_genie.guardrails"),
+                    guardrail_name,
+                )()
+            )
+        elif guardrail_name == "RegexEntityRecognitionGuardrail":
+            st.session_state.guardrails.append(
+                getattr(
+                    importlib.import_module("guardrails_genie.guardrails"),
+                    guardrail_name,
+                )()
+            )
+        elif guardrail_name == "TransformersEntityRecognitionGuardrail":
+            st.session_state.guardrails.append(
+                getattr(
+                    importlib.import_module("guardrails_genie.guardrails"),
+                    guardrail_name,
+                )()
+            )
+        elif guardrail_name == "RestrictedTermsJudge":
+            st.session_state.guardrails.append(
+                getattr(
+                    importlib.import_module("guardrails_genie.guardrails"),
+                    guardrail_name,
+                )()
+            )
     st.session_state.guardrails_manager = GuardrailManager(
         guardrails=st.session_state.guardrails
     )

guardrails_genie/guardrails/ReadMe.md ADDED Viewed

	@@ -0,0 +1,136 @@

+# Entity Recognition Guardrails
+A collection of guardrails for detecting and anonymizing various types of entities in text, including PII (Personally Identifiable Information), restricted terms, and custom entities.
+## Available Guardrails
+### 1. Regex Entity Recognition
+Simple pattern-based entity detection using regular expressions.
+```python
+from guardrails_genie.guardrails.entity_recognition import RegexEntityRecognitionGuardrail
+# Initialize with default PII patterns
+guardrail = RegexEntityRecognitionGuardrail(should_anonymize=True)
+# Or with custom patterns
+custom_patterns = {
+    "employee_id": r"EMP\d{6}",
+    "project_code": r"PRJ-[A-Z]{2}-\d{4}"
+}
+guardrail = RegexEntityRecognitionGuardrail(patterns=custom_patterns, should_anonymize=True)
+```
+### 2. Presidio Entity Recognition
+Advanced entity detection using Microsoft's Presidio analyzer.
+```python
+from guardrails_genie.guardrails.entity_recognition import PresidioEntityRecognitionGuardrail
+# Initialize with default entities
+guardrail = PresidioEntityRecognitionGuardrail(should_anonymize=True)
+# Or with specific entities
+selected_entities = ["CREDIT_CARD", "US_SSN", "EMAIL_ADDRESS"]
+guardrail = PresidioEntityRecognitionGuardrail(
+    selected_entities=selected_entities,
+    should_anonymize=True
+)
+```
+### 3. Transformers Entity Recognition
+Entity detection using transformer-based models.
+```python
+from guardrails_genie.guardrails.entity_recognition import TransformersEntityRecognitionGuardrail
+# Initialize with default model
+guardrail = TransformersEntityRecognitionGuardrail(should_anonymize=True)
+# Or with specific model and entities
+guardrail = TransformersEntityRecognitionGuardrail(
+    model_name="iiiorg/piiranha-v1-detect-personal-information",
+    selected_entities=["GIVENNAME", "SURNAME", "EMAIL"],
+    should_anonymize=True
+)
+```
+### 4. LLM Judge for Restricted Terms
+Advanced detection of restricted terms, competitor mentions, and brand protection using LLMs.
+```python
+from guardrails_genie.guardrails.entity_recognition import RestrictedTermsJudge
+# Initialize with OpenAI model
+guardrail = RestrictedTermsJudge(should_anonymize=True)
+# Check for specific terms
+result = guardrail.guard(
+    text="Let's implement features like Salesforce",
+    custom_terms=["Salesforce", "Oracle", "AWS"]
+)
+```
+## Usage
+All guardrails follow a consistent interface:
+```python
+# Initialize a guardrail
+guardrail = RegexEntityRecognitionGuardrail(should_anonymize=True)
+# Check text for entities
+result = guardrail.guard("Hello, my email is john@example.com")
+# Access results
+print(f"Contains entities: {result.contains_entities}")
+print(f"Detected entities: {result.detected_entities}")
+print(f"Explanation: {result.explanation}")
+print(f"Anonymized text: {result.anonymized_text}")
+```
+## Evaluation Tools
+The module includes comprehensive evaluation tools and test cases:
+- `pii_examples/`: Test cases for PII detection
+- `banned_terms_examples/`: Test cases for restricted terms
+- Benchmark scripts for evaluating model performance
+### Running Evaluations
+```python
+# PII Detection Benchmark
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_benchmark import main
+main()
+# (TODO): Restricted Terms Testing
+from guardrails_genie.guardrails.entity_recognition.banned_terms_examples.banned_term_benchmark import main
+main()
+```
+## Features
+- Entity detection and anonymization
+- Support for multiple detection methods (regex, Presidio, transformers, LLMs)
+- Customizable entity types and patterns
+- Detailed explanations of detected entities
+- Comprehensive evaluation framework
+- Support for custom terms and patterns
+- Batch processing capabilities
+- Performance metrics and benchmarking
+## Response Format
+All guardrails return responses with the following structure:
+```python
+{
+    "contains_entities": bool,
+    "detected_entities": {
+        "entity_type": ["detected_value_1", "detected_value_2"]
+    },
+    "explanation": str,
+    "anonymized_text": Optional[str]
+}
+```

guardrails_genie/guardrails/__init__.py CHANGED Viewed

@@ -2,10 +2,20 @@ from .injection import (
     PromptInjectionClassifierGuardrail,
     PromptInjectionSurveyGuardrail,
 )
 from .manager import GuardrailManager
 __all__ = [
     "PromptInjectionSurveyGuardrail",
     "PromptInjectionClassifierGuardrail",
     "GuardrailManager",
 ]

     PromptInjectionClassifierGuardrail,
     PromptInjectionSurveyGuardrail,
 )
+from .entity_recognition import (
+    PresidioEntityRecognitionGuardrail,
+    RegexEntityRecognitionGuardrail,
+    TransformersEntityRecognitionGuardrail,
+    RestrictedTermsJudge,
+)
 from .manager import GuardrailManager
 __all__ = [
     "PromptInjectionSurveyGuardrail",
     "PromptInjectionClassifierGuardrail",
+    "PresidioEntityRecognitionGuardrail",
+    "RegexEntityRecognitionGuardrail",
+    "TransformersEntityRecognitionGuardrail",
+    "RestrictedTermsJudge",
     "GuardrailManager",
 ]

guardrails_genie/guardrails/entity_recognition/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
+from .regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
+from .transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
+from .llm_judge_entity_recognition_guardrail import RestrictedTermsJudge
+__all__ = [
+    "PresidioEntityRecognitionGuardrail",
+    "RegexEntityRecognitionGuardrail",
+    "TransformersEntityRecognitionGuardrail",
+    "RestrictedTermsJudge"
+]

guardrails_genie/guardrails/entity_recognition/banned_terms_examples/banned_term_benchmark.py ADDED Viewed

File without changes

guardrails_genie/guardrails/entity_recognition/banned_terms_examples/banned_term_examples.py ADDED Viewed

	@@ -0,0 +1,178 @@

+"""
+Collection of restricted terms test examples with expected outcomes for entity recognition testing.
+Focuses on banned terms, competitor mentions, and brand protection scenarios.
+"""
+RESTRICTED_TERMS_EXAMPLES = [
+    {
+        "description": "Competitor Product Discussion",
+        "input_text": """
+I think we should implement features similar to Salesforce's Einstein AI
+and Oracle's Cloud Infrastructure. Maybe we could also look at how
+AWS handles their lambda functions.
+""",
+        "custom_terms": ["Salesforce", "Oracle", "AWS", "Einstein AI", "Cloud Infrastructure", "lambda"],
+        "expected_entities": {
+            "Salesforce": ["Salesforce"],
+            "Oracle": ["Oracle"],
+            "AWS": ["AWS"],
+            "Einstein AI": ["Einstein AI"],
+            "Cloud Infrastructure": ["Cloud Infrastructure"],
+            "lambda": ["lambda"]
+        }
+    },
+    {
+        "description": "Inappropriate Language in Support Ticket",
+        "input_text": """
+This damn product keeps crashing! What the hell is wrong with your
+stupid service? I've wasted so much freaking time on this crap.
+""",
+        "custom_terms": ["damn", "hell", "stupid", "crap"],
+        "expected_entities": {
+            "damn": ["damn"],
+            "hell": ["hell"],
+            "stupid": ["stupid"],
+            "crap": ["crap"]
+        }
+    },
+    {
+        "description": "Confidential Project Names",
+        "input_text": """
+Project Titan's launch date has been moved up. We should coordinate
+with Project Phoenix team and the Blue Dragon initiative for resource allocation.
+""",
+        "custom_terms": ["Project Titan", "Project Phoenix", "Blue Dragon"],
+        "expected_entities": {
+            "Project Titan": ["Project Titan"],
+            "Project Phoenix": ["Project Phoenix"],
+            "Blue Dragon": ["Blue Dragon"]
+        }
+    }
+]
+# Edge cases and special formats
+EDGE_CASE_EXAMPLES = [
+    {
+        "description": "Common Corporate Abbreviations and Stock Symbols",
+        "input_text": """
+MSFT's Azure and O365 platform is gaining market share.
+Have you seen what GOOGL/GOOG and FB/META are doing with their AI?
+CRM (Salesforce) and ORCL (Oracle) have interesting features too.
+""",
+        "custom_terms": ["Microsoft", "Google", "Meta", "Facebook", "Salesforce", "Oracle"],
+        "expected_entities": {
+            "Microsoft": ["MSFT"],
+            "Google": ["GOOGL", "GOOG"],
+            "Meta": ["META"],
+            "Facebook": ["FB"],
+            "Salesforce": ["CRM", "Salesforce"],
+            "Oracle": ["ORCL"]
+        }
+    },
+    {
+        "description": "L33t Speak and Intentional Obfuscation",
+        "input_text": """
+S4l3sf0rc3 is better than 0r4cl3!
+M1cr0$oft and G00gl3 are the main competitors.
+Let's check F8book and Met@ too.
+""",
+        "custom_terms": ["Salesforce", "Oracle", "Microsoft", "Google", "Facebook", "Meta"],
+        "expected_entities": {
+            "Salesforce": ["S4l3sf0rc3"],
+            "Oracle": ["0r4cl3"],
+            "Microsoft": ["M1cr0$oft"],
+            "Google": ["G00gl3"],
+            "Facebook": ["F8book"],
+            "Meta": ["Met@"]
+        }
+    },
+    {
+        "description": "Case Variations and Partial Matches",
+        "input_text": """
+salesFORCE and ORACLE are competitors.
+MicroSoft and google are too.
+Have you tried micro-soft or Google_Cloud?
+""",
+        "custom_terms": ["Microsoft", "Google", "Salesforce", "Oracle"],
+        "expected_entities": {
+            "Microsoft": ["MicroSoft", "micro-soft"],
+            "Google": ["google", "Google_Cloud"],
+            "Salesforce": ["salesFORCE"],
+            "Oracle": ["ORACLE"]
+        }
+    },
+    {
+        "description": "Common Misspellings and Typos",
+        "input_text": """
+Microsft and Microsooft are common typos.
+Goggle, Googel, and Gooogle are search related.
+Salezforce and Oracel need checking too.
+""",
+        "custom_terms": ["Microsoft", "Google", "Salesforce", "Oracle"],
+        "expected_entities": {
+            "Microsoft": ["Microsft", "Microsooft"],
+            "Google": ["Goggle", "Googel", "Gooogle"],
+            "Salesforce": ["Salezforce"],
+            "Oracle": ["Oracel"]
+        }
+    },
+    {
+        "description": "Mixed Variations and Context",
+        "input_text": """
+The M$ cloud competes with AWS (Amazon Web Services).
+FB/Meta's social platform and GOOGL's search dominate.
+SF.com and Oracle-DB are industry standards.
+""",
+        "custom_terms": ["Microsoft", "Amazon Web Services", "Facebook", "Meta", "Google", "Salesforce", "Oracle"],
+        "expected_entities": {
+            "Microsoft": ["M$"],
+            "Amazon Web Services": ["AWS"],
+            "Facebook": ["FB"],
+            "Meta": ["Meta"],
+            "Google": ["GOOGL"],
+            "Salesforce": ["SF.com"],
+            "Oracle": ["Oracle-DB"]
+        }
+    }
+]
+def validate_entities(detected: dict, expected: dict) -> bool:
+    """Compare detected entities with expected entities"""
+    if set(detected.keys()) != set(expected.keys()):
+        return False
+    return all(set(detected[k]) == set(expected[k]) for k in expected.keys())
+def run_test_case(guardrail, test_case, test_type="Main"):
+    """Run a single test case and print results"""
+    print(f"\n{test_type} Test Case: {test_case['description']}")
+    print("-" * 50)
+    result = guardrail.guard(
+        test_case['input_text'],
+        custom_terms=test_case['custom_terms']
+    )
+    expected = test_case['expected_entities']
+    # Validate results
+    matches = validate_entities(result.detected_entities, expected)
+    print(f"Test Status: {'✓ PASS' if matches else '✗ FAIL'}")
+    print(f"Contains Restricted Terms: {result.contains_entities}")
+    if not matches:
+        print("\nEntity Comparison:")
+        all_entity_types = set(list(result.detected_entities.keys()) + list(expected.keys()))
+        for entity_type in all_entity_types:
+            detected = set(result.detected_entities.get(entity_type, []))
+            expected_set = set(expected.get(entity_type, []))
+            print(f"\nEntity Type: {entity_type}")
+            print(f"  Expected: {sorted(expected_set)}")
+            print(f"  Detected: {sorted(detected)}")
+            if detected != expected_set:
+                print(f"  Missing: {sorted(expected_set - detected)}")
+                print(f"  Extra: {sorted(detected - expected_set)}")
+    if result.anonymized_text:
+        print(f"\nAnonymized Text:\n{result.anonymized_text}")
+    return matches

guardrails_genie/guardrails/entity_recognition/banned_terms_examples/run_llm_judge.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from guardrails_genie.guardrails.entity_recognition.llm_judge_entity_recognition_guardrail import RestrictedTermsJudge
+from guardrails_genie.guardrails.entity_recognition.banned_terms_examples.banned_term_examples import (
+    RESTRICTED_TERMS_EXAMPLES,
+    EDGE_CASE_EXAMPLES,
+    run_test_case
+)
+from guardrails_genie.llm import OpenAIModel
+import weave
+def test_restricted_terms_detection():
+    """Test restricted terms detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-restricted-terms-llm-judge")
+    # Create the guardrail with OpenAI model
+    llm_judge = RestrictedTermsJudge(
+        should_anonymize=True,
+        llm_model=OpenAIModel()
+    )
+    # Test statistics
+    total_tests = len(RESTRICTED_TERMS_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main restricted terms examples
+    print("\nRunning Main Restricted Terms Tests")
+    print("=" * 80)
+    for test_case in RESTRICTED_TERMS_EXAMPLES:
+        if run_test_case(llm_judge, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(llm_judge, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_restricted_terms_detection()

guardrails_genie/guardrails/entity_recognition/banned_terms_examples/run_regex_model.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.banned_terms_examples.banned_term_examples import (
+    RESTRICTED_TERMS_EXAMPLES,
+    EDGE_CASE_EXAMPLES,
+    run_test_case
+)
+import weave
+def test_restricted_terms_detection():
+    """Test restricted terms detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-restricted-terms-regex-model")
+    # Create the guardrail with anonymization enabled
+    regex_guardrail = RegexEntityRecognitionGuardrail(
+        use_defaults=False,  # Don't use default PII patterns
+        should_anonymize=True
+    )
+    # Test statistics
+    total_tests = len(RESTRICTED_TERMS_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main restricted terms examples
+    print("\nRunning Main Restricted Terms Tests")
+    print("=" * 80)
+    for test_case in RESTRICTED_TERMS_EXAMPLES:
+        if run_test_case(regex_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(regex_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_restricted_terms_detection()

guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from typing import Dict, List, Optional
+import weave
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+from ...llm import OpenAIModel
+from ..base import Guardrail
+import instructor
+class TermMatch(BaseModel):
+    """Represents a matched term and its variations"""
+    original_term: str
+    matched_text: str
+    match_type: str = Field(
+        description="Type of match: EXACT, MISSPELLING, ABBREVIATION, or VARIANT"
+    )
+    explanation: str = Field(
+        description="Explanation of why this is considered a match"
+    )
+class RestrictedTermsAnalysis(BaseModel):
+    """Analysis result for restricted terms detection"""
+    contains_restricted_terms: bool = Field(
+        description="Whether any restricted terms were detected"
+    )
+    detected_matches: List[TermMatch] = Field(
+        default_factory=list,
+        description="List of detected term matches with their variations"
+    )
+    explanation: str = Field(
+        description="Detailed explanation of the analysis"
+    )
+    anonymized_text: Optional[str] = Field(
+        default=None,
+        description="Text with restricted terms replaced with category tags"
+    )
+    @property
+    def safe(self) -> bool:
+        return not self.contains_restricted_terms
+class RestrictedTermsRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, List[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class RestrictedTermsJudge(Guardrail):
+    llm_model: OpenAIModel = Field(default_factory=lambda: OpenAIModel())
+    should_anonymize: bool = False
+    def __init__(self, should_anonymize: bool = False, **kwargs):
+        super().__init__(**kwargs)
+        self.should_anonymize = should_anonymize
+        # Pass the OpenAI client to instructor.patch()
+        instructor.patch(client=self.llm_model._openai_client)
+    def format_prompts(self, text: str, custom_terms: List[str]) -> tuple[str, str]:
+        user_prompt = f"""Analyze the following text for restricted terms and variations:
+<text_to_analyze>
+{text}
+</text_to_analyze>
+Look for these specific terms and their variations:
+{', '.join(custom_terms)}
+Analyze the text carefully for:
+1. Exact matches
+2. Common misspellings
+3. Abbreviations
+4. Intentional variations (l33t speak, special characters, etc.)
+5. Case variations
+"""
+        system_prompt = """You are an expert system for detecting restricted terms, brand mentions, and inappropriate content.
+Your task is to:
+1. Identify exact matches of restricted terms
+2. Detect variations including:
+   - Misspellings (both accidental and intentional)
+   - Abbreviations and acronyms
+   - Case variations
+   - L33t speak or special character substitutions
+   - Partial matches within larger words
+For each match, you must:
+1. Identify the original restricted term
+2. Note the actual text that matched
+3. Classify the match type
+4. Provide a confidence score
+5. Explain why it's considered a match
+Be thorough but avoid false positives. Focus on meaningful matches that indicate actual attempts to use restricted terms.
+Return your analysis in the structured format specified by the RestrictedTermsAnalysis model."""
+        return user_prompt, system_prompt
+    @weave.op()
+    def predict(self, text: str, custom_terms: List[str], **kwargs) -> RestrictedTermsAnalysis:
+        user_prompt, system_prompt = self.format_prompts(text, custom_terms)
+        response = self.llm_model.predict(
+            user_prompts=user_prompt,
+            system_prompt=system_prompt,
+            response_format=RestrictedTermsAnalysis,
+            temperature=0.1,  # Lower temperature for more consistent analysis
+            **kwargs
+        )
+        return response.choices[0].message.parsed
+    #TODO: Remove default custom_terms
+    @weave.op()
+    def guard(self, text: str, custom_terms: List[str] = ["Microsoft", "Amazon Web Services", "Facebook", "Meta", "Google", "Salesforce", "Oracle"], aggregate_redaction: bool = True, **kwargs) -> RestrictedTermsRecognitionResponse:
+        """
+        Guard against restricted terms and their variations.
+        Args:
+            text: Text to analyze
+            custom_terms: List of restricted terms to check for
+        Returns:
+            RestrictedTermsRecognitionResponse containing safety assessment and detailed analysis
+        """
+        analysis = self.predict(text, custom_terms, **kwargs)
+        # Create a summary of findings
+        if analysis.contains_restricted_terms:
+            summary_parts = ["Restricted terms detected:"]
+            for match in analysis.detected_matches:
+                summary_parts.append(f"\n- {match.original_term}: {match.matched_text} ({match.match_type})")
+            summary = "\n".join(summary_parts)
+        else:
+            summary = "No restricted terms detected."
+        # Updated anonymization logic
+        anonymized_text = None
+        if self.should_anonymize and analysis.contains_restricted_terms:
+            anonymized_text = text
+            for match in analysis.detected_matches:
+                replacement = "[redacted]" if aggregate_redaction else f"[{match.match_type.upper()}]"
+                anonymized_text = anonymized_text.replace(match.matched_text, replacement)
+        # Convert detected_matches to a dictionary format
+        detected_entities = {}
+        for match in analysis.detected_matches:
+            if match.original_term not in detected_entities:
+                detected_entities[match.original_term] = []
+            detected_entities[match.original_term].append(match.matched_text)
+        return RestrictedTermsRecognitionResponse(
+            contains_entities=analysis.contains_restricted_terms,
+            detected_entities=detected_entities,
+            explanation=summary,
+            anonymized_text=anonymized_text
+        )

guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark.py ADDED Viewed

	@@ -0,0 +1,215 @@

+from datasets import load_dataset
+from typing import Dict, List, Tuple
+import random
+from tqdm import tqdm
+import json
+from pathlib import Path
+import weave
+def load_ai4privacy_dataset(num_samples: int = 100, split: str = "validation") -> List[Dict]:
+    """
+    Load and prepare samples from the ai4privacy dataset.
+    Args:
+        num_samples: Number of samples to evaluate
+        split: Dataset split to use ("train" or "validation")
+    Returns:
+        List of prepared test cases
+    """
+    # Load the dataset
+    dataset = load_dataset("ai4privacy/pii-masking-400k")
+    # Get the specified split
+    data_split = dataset[split]
+    # Randomly sample entries if num_samples is less than total
+    if num_samples < len(data_split):
+        indices = random.sample(range(len(data_split)), num_samples)
+        samples = [data_split[i] for i in indices]
+    else:
+        samples = data_split
+    # Convert to test case format
+    test_cases = []
+    for sample in samples:
+        # Extract entities from privacy_mask
+        entities: Dict[str, List[str]] = {}
+        for entity in sample['privacy_mask']:
+            label = entity['label']
+            value = entity['value']
+            if label not in entities:
+                entities[label] = []
+            entities[label].append(value)
+        test_case = {
+            "description": f"AI4Privacy Sample (ID: {sample['uid']})",
+            "input_text": sample['source_text'],
+            "expected_entities": entities,
+            "masked_text": sample['masked_text'],
+            "language": sample['language'],
+            "locale": sample['locale']
+        }
+        test_cases.append(test_case)
+    return test_cases
+@weave.op()
+def evaluate_model(guardrail, test_cases: List[Dict]) -> Tuple[Dict, List[Dict]]:
+    """
+    Evaluate a model on the test cases.
+    Args:
+        guardrail: Entity recognition guardrail to evaluate
+        test_cases: List of test cases
+    Returns:
+        Tuple of (metrics dict, detailed results list)
+    """
+    metrics = {
+        "total": len(test_cases),
+        "passed": 0,
+        "failed": 0,
+        "entity_metrics": {}  # Will store precision/recall per entity type
+    }
+    detailed_results = []
+    for test_case in tqdm(test_cases, desc="Evaluating samples"):
+        # Run detection
+        result = guardrail.guard(test_case['input_text'])
+        detected = result.detected_entities
+        expected = test_case['expected_entities']
+        # Track entity-level metrics
+        all_entity_types = set(list(detected.keys()) + list(expected.keys()))
+        entity_results = {}
+        for entity_type in all_entity_types:
+            detected_set = set(detected.get(entity_type, []))
+            expected_set = set(expected.get(entity_type, []))
+            # Calculate metrics
+            true_positives = len(detected_set & expected_set)
+            false_positives = len(detected_set - expected_set)
+            false_negatives = len(expected_set - detected_set)
+            precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
+            recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
+            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+            entity_results[entity_type] = {
+                "precision": precision,
+                "recall": recall,
+                "f1": f1,
+                "true_positives": true_positives,
+                "false_positives": false_positives,
+                "false_negatives": false_negatives
+            }
+            # Aggregate metrics
+            if entity_type not in metrics["entity_metrics"]:
+                metrics["entity_metrics"][entity_type] = {
+                    "total_true_positives": 0,
+                    "total_false_positives": 0,
+                    "total_false_negatives": 0
+                }
+            metrics["entity_metrics"][entity_type]["total_true_positives"] += true_positives
+            metrics["entity_metrics"][entity_type]["total_false_positives"] += false_positives
+            metrics["entity_metrics"][entity_type]["total_false_negatives"] += false_negatives
+        # Store detailed result
+        detailed_result = {
+            "id": test_case.get("description", ""),
+            "language": test_case.get("language", ""),
+            "locale": test_case.get("locale", ""),
+            "input_text": test_case["input_text"],
+            "expected_entities": expected,
+            "detected_entities": detected,
+            "entity_metrics": entity_results,
+            "anonymized_text": result.anonymized_text if result.anonymized_text else None
+        }
+        detailed_results.append(detailed_result)
+        # Update pass/fail counts
+        if all(entity_results[et]["f1"] == 1.0 for et in entity_results):
+            metrics["passed"] += 1
+        else:
+            metrics["failed"] += 1
+    # Calculate final entity metrics
+    for entity_type, counts in metrics["entity_metrics"].items():
+        tp = counts["total_true_positives"]
+        fp = counts["total_false_positives"]
+        fn = counts["total_false_negatives"]
+        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+        metrics["entity_metrics"][entity_type].update({
+            "precision": precision,
+            "recall": recall,
+            "f1": f1
+        })
+    return metrics, detailed_results
+def save_results(metrics: Dict, detailed_results: List[Dict], model_name: str, output_dir: str = "evaluation_results"):
+    """Save evaluation results to files"""
+    output_dir = Path(output_dir)
+    output_dir.mkdir(exist_ok=True)
+    # Save metrics summary
+    with open(output_dir / f"{model_name}_metrics.json", "w") as f:
+        json.dump(metrics, f, indent=2)
+    # Save detailed results
+    with open(output_dir / f"{model_name}_detailed_results.json", "w") as f:
+        json.dump(detailed_results, f, indent=2)
+def print_metrics_summary(metrics: Dict):
+    """Print a summary of the evaluation metrics"""
+    print("\nEvaluation Summary")
+    print("=" * 80)
+    print(f"Total Samples: {metrics['total']}")
+    print(f"Passed: {metrics['passed']}")
+    print(f"Failed: {metrics['failed']}")
+    print(f"Success Rate: {(metrics['passed']/metrics['total'])*100:.1f}%")
+    print("\nEntity-level Metrics:")
+    print("-" * 80)
+    print(f"{'Entity Type':<20} {'Precision':>10} {'Recall':>10} {'F1':>10}")
+    print("-" * 80)
+    for entity_type, entity_metrics in metrics["entity_metrics"].items():
+        print(f"{entity_type:<20} {entity_metrics['precision']:>10.2f} {entity_metrics['recall']:>10.2f} {entity_metrics['f1']:>10.2f}")
+def main():
+    """Main evaluation function"""
+    weave.init("guardrails-genie-pii-evaluation")
+    # Load test cases
+    test_cases = load_ai4privacy_dataset(num_samples=100)
+    # Initialize models to evaluate
+    models = {
+        "regex": RegexEntityRecognitionGuardrail(should_anonymize=True),
+        "presidio": PresidioEntityRecognitionGuardrail(should_anonymize=True),
+        "transformers": TransformersEntityRecognitionGuardrail(should_anonymize=True)
+    }
+    # Evaluate each model
+    for model_name, guardrail in models.items():
+        print(f"\nEvaluating {model_name} model...")
+        metrics, detailed_results = evaluate_model(guardrail, test_cases)
+        # Print and save results
+        print_metrics_summary(metrics)
+        save_results(metrics, detailed_results, model_name)
+if __name__ == "__main__":
+    from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
+    from guardrails_genie.guardrails.entity_recognition.presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
+    from guardrails_genie.guardrails.entity_recognition.transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
+    main()

guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+Collection of PII test examples with expected outcomes for entity recognition testing.
+Each example includes the input text and expected entities to be detected.
+"""
+PII_TEST_EXAMPLES = [
+    {
+        "description": "Business Context - Employee Record",
+        "input_text": """
+Please update our records for employee John Smith:
+Email: john.smith@company.com
+Phone: 123-456-7890
+SSN: 123-45-6789
+Emergency Contact: Mary Johnson (Tel: 098-765-4321)
+""",
+        "expected_entities": {
+            "GIVENNAME": ["John", "Mary"],
+            "SURNAME": ["Smith", "Johnson"],
+            "EMAIL": ["john.smith@company.com"],
+            "PHONE_NUMBER": ["123-456-7890", "098-765-4321"],
+            "SOCIALNUM": ["123-45-6789"]
+        }
+    },
+    {
+        "description": "Meeting Notes with Attendees",
+        "input_text": """
+Meeting Notes - Project Alpha
+Date: 2024-03-15
+Attendees:
+- Sarah Williams (sarah.w@company.com)
+- Robert Brown (bobby@email.com)
+- Tom Wilson (555-0123-4567)
+Action Items:
+1. Sarah to review documentation
+2. Contact Bob at his alternate number: 777-888-9999
+""",
+        "expected_entities": {
+            "GIVENNAME": ["Sarah", "Robert", "Tom", "Bob"],
+            "SURNAME": ["Williams", "Brown", "Wilson"],
+            "EMAIL": ["sarah.w@company.com", "bobby@email.com"],
+            "PHONE_NUMBER": ["555-0123-4567", "777-888-9999"]
+        }
+    },
+    {
+        "description": "Medical Record",
+        "input_text": """
+Patient: Emma Thompson
+DOB: 05/15/1980
+Medical Record #: MR-12345
+Primary Care: Dr. James Wilson
+Contact: emma.t@email.com
+Insurance ID: INS-987654321
+Emergency Contact: Michael Thompson (555-123-4567)
+""",
+        "expected_entities": {
+            "GIVENNAME": ["Emma", "James", "Michael"],
+            "SURNAME": ["Thompson", "Wilson", "Thompson"],
+            "EMAIL": ["emma.t@email.com"],
+            "PHONE_NUMBER": ["555-123-4567"]
+        }
+    },
+    {
+        "description": "No PII Content",
+        "input_text": """
+Project Status Update:
+- All deliverables are on track
+- Budget is within limits
+- Next review scheduled for next week
+""",
+        "expected_entities": {}
+    },
+    {
+        "description": "Mixed Format Phone Numbers",
+        "input_text": """
+Contact Directory:
+Main Office: (555) 123-4567
+Support: 555.987.6543
+International: +1-555-321-7890
+Emergency: 555 444 3333
+""",
+        "expected_entities": {
+            "PHONE_NUMBER": [
+                "(555) 123-4567",
+                "555.987.6543",
+                "+1-555-321-7890",
+                "555 444 3333"
+            ]
+        }
+    }
+]
+# Additional examples can be added to test specific edge cases or formats
+EDGE_CASE_EXAMPLES = [
+    {
+        "description": "Mixed Case and Special Characters",
+        "input_text": """
+JoHn.DoE@company.com
+JANE_SMITH@email.com
+bob.jones123@domain.co.uk
+""",
+        "expected_entities": {
+            "EMAIL": [
+                "JoHn.DoE@company.com",
+                "JANE_SMITH@email.com",
+                "bob.jones123@domain.co.uk"
+            ],
+            "GIVENNAME": ["John", "Jane", "Bob"],
+            "SURNAME": ["Doe", "Smith", "Jones"]
+        }
+    }
+]
+def validate_entities(detected: dict, expected: dict) -> bool:
+    """Compare detected entities with expected entities"""
+    if set(detected.keys()) != set(expected.keys()):
+        return False
+    return all(set(detected[k]) == set(expected[k]) for k in expected.keys())
+def run_test_case(guardrail, test_case, test_type="Main"):
+    """Run a single test case and print results"""
+    print(f"\n{test_type} Test Case: {test_case['description']}")
+    print("-" * 50)
+    result = guardrail.guard(test_case['input_text'])
+    expected = test_case['expected_entities']
+    # Validate results
+    matches = validate_entities(result.detected_entities, expected)
+    print(f"Test Status: {'✓ PASS' if matches else '✗ FAIL'}")
+    print(f"Contains PII: {result.contains_entities}")
+    if not matches:
+        print("\nEntity Comparison:")
+        all_entity_types = set(list(result.detected_entities.keys()) + list(expected.keys()))
+        for entity_type in all_entity_types:
+            detected = set(result.detected_entities.get(entity_type, []))
+            expected_set = set(expected.get(entity_type, []))
+            print(f"\nEntity Type: {entity_type}")
+            print(f"  Expected: {sorted(expected_set)}")
+            print(f"  Detected: {sorted(detected)}")
+            if detected != expected_set:
+                print(f"  Missing: {sorted(expected_set - detected)}")
+                print(f"  Extra: {sorted(detected - expected_set)}")
+    if result.anonymized_text:
+        print(f"\nAnonymized Text:\n{result.anonymized_text}")
+    return matches

guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from guardrails_genie.guardrails.entity_recognition.presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-presidio-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = PresidioEntityRecognitionGuardrail(
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-regex-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = RegexEntityRecognitionGuardrail(
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from guardrails_genie.guardrails.entity_recognition.transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-transformers-pipeline-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = TransformersEntityRecognitionGuardrail(
+        selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "TELEPHONENUM", "SOCIALNUM"],
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/entity_recognition/presidio_entity_recognition_guardrail.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from typing import List, Dict, Optional, ClassVar, Any
+import weave
+from pydantic import BaseModel
+from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, Pattern, PatternRecognizer
+from presidio_anonymizer import AnonymizerEngine
+from ..base import Guardrail
+class PresidioEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, List[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class PresidioEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+#TODO: Add support for transformers workflow and not just Spacy
+class PresidioEntityRecognitionGuardrail(Guardrail):
+    @staticmethod
+    def get_available_entities() -> List[str]:
+        registry = RecognizerRegistry()
+        analyzer = AnalyzerEngine(registry=registry)
+        return [recognizer.supported_entities[0]
+                for recognizer in analyzer.registry.recognizers]
+    analyzer: AnalyzerEngine
+    anonymizer: AnonymizerEngine
+    selected_entities: List[str]
+    should_anonymize: bool
+    language: str
+    def __init__(
+        self,
+        selected_entities: Optional[List[str]] = None,
+        should_anonymize: bool = False,
+        language: str = "en",
+        deny_lists: Optional[Dict[str, List[str]]] = None,
+        regex_patterns: Optional[Dict[str, List[Dict[str, str]]]] = None,
+        custom_recognizers: Optional[List[Any]] = None,
+        show_available_entities: bool = False
+    ):
+        # If show_available_entities is True, print available entities
+        if show_available_entities:
+            available_entities = self.get_available_entities()
+            print("\nAvailable entities:")
+            print("=" * 25)
+            for entity in available_entities:
+                print(f"- {entity}")
+            print("=" * 25 + "\n")
+        # Initialize default values
+        if selected_entities is None:
+            selected_entities = [
+                "CREDIT_CARD", "US_SSN", "EMAIL_ADDRESS", "PHONE_NUMBER",
+                "IP_ADDRESS", "URL", "DATE_TIME"
+            ]
+        # Get available entities dynamically
+        available_entities = self.get_available_entities()
+        # Filter out invalid entities and warn user
+        invalid_entities = [e for e in selected_entities if e not in available_entities]
+        valid_entities = [e for e in selected_entities if e in available_entities]
+        if invalid_entities:
+            print(f"\nWarning: The following entities are not available and will be ignored: {invalid_entities}")
+            print(f"Continuing with valid entities: {valid_entities}")
+            selected_entities = valid_entities
+        # Initialize analyzer with default recognizers
+        analyzer = AnalyzerEngine()
+        # Add custom recognizers if provided
+        if custom_recognizers:
+            for recognizer in custom_recognizers:
+                analyzer.registry.add_recognizer(recognizer)
+        # Add deny list recognizers if provided
+        if deny_lists:
+            for entity_type, tokens in deny_lists.items():
+                deny_list_recognizer = PatternRecognizer(
+                    supported_entity=entity_type,
+                    deny_list=tokens
+                )
+                analyzer.registry.add_recognizer(deny_list_recognizer)
+        # Add regex pattern recognizers if provided
+        if regex_patterns:
+            for entity_type, patterns in regex_patterns.items():
+                presidio_patterns = [
+                    Pattern(
+                        name=pattern.get("name", f"pattern_{i}"),
+                        regex=pattern["regex"],
+                        score=pattern.get("score", 0.5)
+                    ) for i, pattern in enumerate(patterns)
+                ]
+                regex_recognizer = PatternRecognizer(
+                    supported_entity=entity_type,
+                    patterns=presidio_patterns
+                )
+                analyzer.registry.add_recognizer(regex_recognizer)
+        # Initialize Presidio engines
+        anonymizer = AnonymizerEngine()
+        # Call parent class constructor with all fields
+        super().__init__(
+            analyzer=analyzer,
+            anonymizer=anonymizer,
+            selected_entities=selected_entities,
+            should_anonymize=should_anonymize,
+            language=language
+        )
+    @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
+        """
+        Check if the input prompt contains any entities using Presidio.
+        Args:
+            prompt: The text to analyze
+            return_detected_types: If True, returns detailed entity type information
+        """
+        # Analyze text for entities
+        analyzer_results = self.analyzer.analyze(
+            text=prompt,
+            entities=self.selected_entities,
+            language=self.language
+        )
+        # Group results by entity type
+        detected_entities = {}
+        for result in analyzer_results:
+            entity_type = result.entity_type
+            text_slice = prompt[result.start:result.end]
+            if entity_type not in detected_entities:
+                detected_entities[entity_type] = []
+            detected_entities[entity_type].append(text_slice)
+        # Create explanation
+        explanation_parts = []
+        if detected_entities:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, instances in detected_entities.items():
+                explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
+        else:
+            explanation_parts.append("No entities detected in the text.")
+        # Add information about what was checked
+        explanation_parts.append("\nChecked for these entity types:")
+        for entity in self.selected_entities:
+            explanation_parts.append(f"- {entity}")
+        # Anonymize if requested
+        anonymized_text = None
+        if self.should_anonymize and detected_entities:
+            anonymized_result = self.anonymizer.anonymize(
+                text=prompt,
+                analyzer_results=analyzer_results
+            )
+            anonymized_text = anonymized_result.text
+        if return_detected_types:
+            return PresidioEntityRecognitionResponse(
+                contains_entities=bool(detected_entities),
+                detected_entities=detected_entities,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return PresidioEntityRecognitionSimpleResponse(
+                contains_entities=bool(detected_entities),
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

guardrails_genie/guardrails/entity_recognition/regex_entity_recognition_guardrail.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from typing import Dict, Optional, ClassVar
+import weave
+from pydantic import BaseModel
+from ...regex_model import RegexModel
+from ..base import Guardrail
+import re
+class RegexEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, list[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class RegexEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class RegexEntityRecognitionGuardrail(Guardrail):
+    regex_model: RegexModel
+    patterns: Dict[str, str] = {}
+    should_anonymize: bool = False
+    DEFAULT_PATTERNS: ClassVar[Dict[str, str]] = {
+        "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
+        "phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
+        "ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
+        "credit_card": r"\b\d{4}[-.]?\d{4}[-.]?\d{4}[-.]?\d{4}\b",
+        "ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
+        "date_of_birth": r"\b\d{2}[-/]\d{2}[-/]\d{4}\b",
+        "passport": r"\b[A-Z]{1,2}[0-9]{6,9}\b",
+        "drivers_license": r"\b[A-Z]\d{7}\b",
+        "bank_account": r"\b\d{8,17}\b",
+        "zip_code": r"\b\d{5}(?:[-]\d{4})?\b"
+    }
+    def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
+        patterns = {}
+        if use_defaults:
+            patterns = self.DEFAULT_PATTERNS.copy()
+        if kwargs.get("patterns"):
+            patterns.update(kwargs["patterns"])
+        # Create the RegexModel instance
+        regex_model = RegexModel(patterns=patterns)
+        # Initialize the base class with both the regex_model and patterns
+        super().__init__(
+            regex_model=regex_model,
+            patterns=patterns,
+            should_anonymize=should_anonymize
+        )
+    def text_to_pattern(self, text: str) -> str:
+        """
+        Convert input text into a regex pattern that matches the exact text.
+        """
+        # Escape special regex characters in the text
+        escaped_text = re.escape(text)
+        # Create a pattern that matches the exact text, case-insensitive
+        return rf"\b{escaped_text}\b"
+    @weave.op()
+    def guard(self, prompt: str, custom_terms: Optional[list[str]] = None, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
+        """
+        Check if the input prompt contains any entities based on the regex patterns.
+        Args:
+            prompt: Input text to check for entities
+            custom_terms: List of custom terms to be converted into regex patterns. If provided,
+                        only these terms will be checked, ignoring default patterns.
+            return_detected_types: If True, returns detailed entity type information
+        Returns:
+            RegexEntityRecognitionResponse or RegexEntityRecognitionSimpleResponse containing detection results
+        """
+        if custom_terms:
+            # Create a temporary RegexModel with only the custom patterns
+            temp_patterns = {term: self.text_to_pattern(term) for term in custom_terms}
+            temp_model = RegexModel(patterns=temp_patterns)
+            result = temp_model.check(prompt)
+        else:
+            # Use the original regex_model if no custom terms provided
+            result = self.regex_model.check(prompt)
+        # Create detailed explanation
+        explanation_parts = []
+        if result.matched_patterns:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, matches in result.matched_patterns.items():
+                explanation_parts.append(f"- {entity_type}: {len(matches)} instance(s)")
+        else:
+            explanation_parts.append("No entities detected in the text.")
+        if result.failed_patterns:
+            explanation_parts.append("\nChecked but did not find these entity types:")
+            for pattern in result.failed_patterns:
+                explanation_parts.append(f"- {pattern}")
+        # Updated anonymization logic
+        anonymized_text = None
+        if getattr(self, 'should_anonymize', False) and result.matched_patterns:
+            anonymized_text = prompt
+            for entity_type, matches in result.matched_patterns.items():
+                for match in matches:
+                    replacement = "[redacted]" if aggregate_redaction else f"[{entity_type.upper()}]"
+                    anonymized_text = anonymized_text.replace(match, replacement)
+        if return_detected_types:
+            return RegexEntityRecognitionResponse(
+                contains_entities=not result.passed,
+                detected_entities=result.matched_patterns,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return RegexEntityRecognitionSimpleResponse(
+                contains_entities=not result.passed,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)

guardrails_genie/guardrails/entity_recognition/transformers_entity_recognition_guardrail.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from typing import List, Dict, Optional, ClassVar
+from transformers import pipeline, AutoConfig
+import json
+from pydantic import BaseModel
+from ..base import Guardrail
+import weave
+class TransformersEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, List[str]]
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class TransformersEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
+    explanation: str
+    anonymized_text: Optional[str] = None
+    @property
+    def safe(self) -> bool:
+        return not self.contains_entities
+class TransformersEntityRecognitionGuardrail(Guardrail):
+    """Generic guardrail for detecting entities using any token classification model."""
+    _pipeline: Optional[object] = None
+    selected_entities: List[str]
+    should_anonymize: bool
+    available_entities: List[str]
+    def __init__(
+        self,
+        model_name: str = "iiiorg/piiranha-v1-detect-personal-information",
+        selected_entities: Optional[List[str]] = None,
+        should_anonymize: bool = False,
+        show_available_entities: bool = True,
+    ):
+        # Load model config and extract available entities
+        config = AutoConfig.from_pretrained(model_name)
+        entities = self._extract_entities_from_config(config)
+        if show_available_entities:
+            self._print_available_entities(entities)
+        # Initialize default values if needed
+        if selected_entities is None:
+            selected_entities = entities  # Use all available entities by default
+        # Filter out invalid entities and warn user
+        invalid_entities = [e for e in selected_entities if e not in entities]
+        valid_entities = [e for e in selected_entities if e in entities]
+        if invalid_entities:
+            print(f"\nWarning: The following entities are not available and will be ignored: {invalid_entities}")
+            print(f"Continuing with valid entities: {valid_entities}")
+            selected_entities = valid_entities
+        # Call parent class constructor
+        super().__init__(
+            selected_entities=selected_entities,
+            should_anonymize=should_anonymize,
+            available_entities=entities
+        )
+        # Initialize pipeline
+        self._pipeline = pipeline(
+            task="token-classification",
+            model=model_name,
+            aggregation_strategy="simple"  # Merge same entities
+        )
+    def _extract_entities_from_config(self, config) -> List[str]:
+        """Extract unique entity types from the model config."""
+        # Get id2label mapping from config
+        id2label = config.id2label
+        # Extract unique entity types (removing B- and I- prefixes)
+        entities = set()
+        for label in id2label.values():
+            if label.startswith(('B-', 'I-')):
+                entities.add(label[2:])  # Remove prefix
+            elif label != 'O':  # Skip the 'O' (Outside) label
+                entities.add(label)
+        return sorted(list(entities))
+    def _print_available_entities(self, entities: List[str]):
+        """Print all available entity types that can be detected by the model."""
+        print("\nAvailable entity types:")
+        print("=" * 25)
+        for entity in entities:
+            print(f"- {entity}")
+        print("=" * 25 + "\n")
+    def print_available_entities(self):
+        """Print all available entity types that can be detected by the model."""
+        self._print_available_entities(self.available_entities)
+    def _detect_entities(self, text: str) -> Dict[str, List[str]]:
+        """Detect entities in the text using the pipeline."""
+        results = self._pipeline(text)
+        # Group findings by entity type
+        detected_entities = {}
+        for entity in results:
+            entity_type = entity['entity_group']
+            if entity_type in self.selected_entities:
+                if entity_type not in detected_entities:
+                    detected_entities[entity_type] = []
+                detected_entities[entity_type].append(entity['word'])
+        return detected_entities
+    def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
+        """Anonymize detected entities in text using the pipeline."""
+        results = self._pipeline(text)
+        # Sort entities by start position in reverse order to avoid offset issues
+        entities = sorted(results, key=lambda x: x['start'], reverse=True)
+        # Create a mutable list of characters
+        chars = list(text)
+        # Apply redactions
+        for entity in entities:
+            if entity['entity_group'] in self.selected_entities:
+                start, end = entity['start'], entity['end']
+                replacement = ' [redacted] ' if aggregate_redaction else f" [{entity['entity_group']}] "
+                # Replace the entity with the redaction marker
+                chars[start:end] = replacement
+        # Join characters and clean up only consecutive spaces (preserving newlines)
+        result = ''.join(chars)
+        # Replace multiple spaces with single space, but preserve newlines
+        lines = result.split('\n')
+        cleaned_lines = [' '.join(line.split()) for line in lines]
+        return '\n'.join(cleaned_lines)
+    @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
+        """Check if the input prompt contains any entities using the transformer pipeline.
+        Args:
+            prompt: The text to analyze
+            return_detected_types: If True, returns detailed entity type information
+            aggregate_redaction: If True, uses generic [redacted] instead of entity type
+        """
+        # Detect entities
+        detected_entities = self._detect_entities(prompt)
+        # Create explanation
+        explanation_parts = []
+        if detected_entities:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, instances in detected_entities.items():
+                explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
+        else:
+            explanation_parts.append("No entities detected in the text.")
+        explanation_parts.append("\nChecked for these entities:")
+        for entity in self.selected_entities:
+            explanation_parts.append(f"- {entity}")
+        # Anonymize if requested
+        anonymized_text = None
+        if self.should_anonymize and detected_entities:
+            anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
+        if return_detected_types:
+            return TransformersEntityRecognitionResponse(
+                contains_entities=bool(detected_entities),
+                detected_entities=detected_entities,
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+        else:
+            return TransformersEntityRecognitionSimpleResponse(
+                contains_entities=bool(detected_entities),
+                explanation="\n".join(explanation_parts),
+                anonymized_text=anonymized_text
+            )
+    @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
+        return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)

guardrails_genie/guardrails/manager.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import weave
 from rich.progress import track
 from .base import Guardrail
@@ -20,10 +21,12 @@ class GuardrailManager(weave.Model):
             alerts.append(
                 {"guardrail_name": guardrail.__class__.__name__, "response": response}
             )
-            safe = safe and response["safe"]
-            summaries += (
-                f"**{guardrail.__class__.__name__}**: {response['summary']}\n\n---\n\n"
-            )
         return {"safe": safe, "alerts": alerts, "summary": summaries}
     @weave.op()

 import weave
 from rich.progress import track
+from pydantic import BaseModel
 from .base import Guardrail
             alerts.append(
                 {"guardrail_name": guardrail.__class__.__name__, "response": response}
             )
+            if isinstance(response, BaseModel):
+                safe = safe and response.safe
+                summaries += f"**{guardrail.__class__.__name__}**: {response.explanation}\n\n---\n\n"
+            else:
+                safe = safe and response["safe"]
+                summaries += f"**{guardrail.__class__.__name__}**: {response['summary']}\n\n---\n\n"
         return {"safe": safe, "alerts": alerts, "summary": summaries}
     @weave.op()

guardrails_genie/regex_model.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from typing import List, Dict, Optional
+import re
+import weave
+from pydantic import BaseModel
+class RegexResult(BaseModel):
+    passed: bool
+    matched_patterns: Dict[str, List[str]]
+    failed_patterns: List[str]
+class RegexModel(weave.Model):
+    patterns: Dict[str, str]
+    def __init__(self, patterns: Dict[str, str]) -> None:
+        """
+        Initialize RegexModel with a dictionary of patterns.
+        Args:
+            patterns: Dictionary where key is pattern name and value is regex pattern
+                     Example: {"email": r"[^@ \t\r\n]+@[^@ \t\r\n]+\.[^@ \t\r\n]+",
+                              "phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"}
+        """
+        super().__init__(patterns=patterns)
+        self._compiled_patterns = {
+            name: re.compile(pattern) for name, pattern in patterns.items()
+        }
+    @weave.op()
+    def check(self, text: str) -> RegexResult:
+        """
+        Check text against all patterns and return detailed results.
+        Args:
+            text: Input text to check against patterns
+        Returns:
+            RegexResult containing pass/fail status and details about matches
+        """
+        matches: Dict[str, List[str]] = {}
+        failed_patterns: List[str] = []
+        for pattern_name, compiled_pattern in self._compiled_patterns.items():
+            found_matches = compiled_pattern.findall(text)
+            if found_matches:
+                matches[pattern_name] = found_matches
+            else:
+                failed_patterns.append(pattern_name)
+        # Consider it passed only if no patterns matched (no PII found)
+        passed = len(matches) == 0
+        return RegexResult(
+            passed=passed,
+            matched_patterns=matches,
+            failed_patterns=failed_patterns
+        )
+    @weave.op()
+    def predict(self, text: str) -> RegexResult:
+        """
+        Alias for check() to maintain consistency with other models.
+        """
+        return self.check(text)

pyproject.toml CHANGED Viewed

@@ -20,6 +20,8 @@ dependencies = [
     "pymupdf4llm>=0.0.17",
     "transformers>=4.46.3",
     "torch>=2.5.1",
 ]
 [tool.setuptools]

     "pymupdf4llm>=0.0.17",
     "transformers>=4.46.3",
     "torch>=2.5.1",
+    "presidio-analyzer>=2.2.355",
+    "presidio-anonymizer>=2.2.355",
 ]
 [tool.setuptools]