Spaces:

wandb
/

guardrails-genie

Running

File size: 6,312 Bytes

import hashlib
import json
import pathlib
from enum import Enum
from typing import Union, Optional

import weave
from pydantic import BaseModel

from guardrails_genie.guardrails.base import Guardrail
from guardrails_genie.regex_model import RegexModel


def load_secrets_patterns() -> dict[str, list[str]]:
    """
    Load secret patterns from a JSONL file and return them as a dictionary.

    Returns:
        dict: A dictionary where keys are pattern names and values are lists of regex patterns.
    """
    default_patterns = {}
    patterns = (
        pathlib.Path(__file__).parent.absolute() / "secrets_patterns.jsonl"
    ).read_text()

    for pattern in patterns.splitlines():
        pattern = json.loads(pattern)
        default_patterns[pattern["name"]] = [rf"{pat}" for pat in pattern["patterns"]]
    return default_patterns


# Load default secret patterns from the JSONL file
DEFAULT_SECRETS_PATTERNS = load_secrets_patterns()


class REDACTION(str, Enum):
    """
    Enum for different types of redaction methods.
    """

    REDACT_PARTIAL = "REDACT_PARTIAL"
    REDACT_ALL = "REDACT_ALL"
    REDACT_HASH = "REDACT_HASH"
    REDACT_NONE = "REDACT_NONE"


def redact(text: str, matches: list[str], redaction_type: REDACTION) -> str:
    """
    Redact the given matches in the text based on the redaction type.

    Args:
        text (str): The input text to redact.
        matches (list[str]): List of strings to be redacted.
        redaction_type (REDACTION): The type of redaction to apply.

    Returns:
        str: The redacted text.
    """
    for match in matches:
        if redaction_type == REDACTION.REDACT_PARTIAL:
            replacement = "[REDACTED:]" + match[:2] + ".." + match[-2:] + "[:REDACTED]"
        elif redaction_type == REDACTION.REDACT_ALL:
            replacement = "[REDACTED:]" + ("*" * len(match)) + "[:REDACTED]"
        elif redaction_type == REDACTION.REDACT_HASH:
            replacement = (
                "[REDACTED:]" + hashlib.md5(match.encode()).hexdigest() + "[:REDACTED]"
            )
        else:
            replacement = match
        text = text.replace(match, replacement)
    return text


class SecretsDetectionSimpleResponse(BaseModel):
    """
    A simple response model for secrets detection.

    Attributes:
        contains_secrets (bool): Indicates if secrets were detected.
        explanation (str): Explanation of the detection result.
        redacted_text (Optional[str]): The redacted text if secrets were found.
    """

    contains_secrets: bool
    explanation: str
    redacted_text: Optional[str] = None

    @property
    def safe(self) -> bool:
        """
        Property to check if the text is safe (no secrets detected).

        Returns:
            bool: True if no secrets were detected, False otherwise.
        """
        return not self.contains_secrets


class SecretsDetectionResponse(SecretsDetectionSimpleResponse):
    """
    A detailed response model for secrets detection.

    Attributes:
        detected_secrets (dict[str, list[str]]): Dictionary of detected secrets.
    """

    detected_secrets: dict[str, list[str]]


class SecretsDetectionGuardrail(Guardrail):
    """
    A guardrail for detecting secrets in text using regex patterns.
    reference: SecretBench: A Dataset of Software Secrets
    https://arxiv.org/abs/2303.06729

    Attributes:
        regex_model (RegexModel): The regex model used for detection.
        patterns (Union[dict[str, str], dict[str, list[str]]]): The patterns used for detection.
        redaction (REDACTION): The type of redaction to apply.
    """

    regex_model: RegexModel
    patterns: Union[dict[str, str], dict[str, list[str]]] = {}
    redaction: REDACTION

    def __init__(
        self,
        use_defaults: bool = True,
        redaction: REDACTION = REDACTION.REDACT_ALL,
        **kwargs,
    ):
        """
        Initialize the SecretsDetectionGuardrail.

        Args:
            use_defaults (bool): Whether to use default patterns.
            redaction (REDACTION): The type of redaction to apply.
            **kwargs: Additional keyword arguments.
        """
        patterns = {}
        if use_defaults:
            patterns = DEFAULT_SECRETS_PATTERNS.copy()
        if kwargs.get("patterns"):
            patterns.update(kwargs["patterns"])

        regex_model = RegexModel(patterns=patterns)

        super().__init__(
            regex_model=regex_model,
            patterns=patterns,
            redaction=redaction,
        )

    @weave.op()
    def guard(
        self,
        prompt: str,
        return_detected_secrets: bool = True,
        **kwargs,
    ) -> SecretsDetectionResponse | SecretsDetectionResponse:
        """
        Check if the input prompt contains any secrets based on the regex patterns.

        Args:
            prompt (str): Input text to check for secrets.
            return_detected_secrets (bool): If True, returns detailed secrets type information.

        Returns:
            SecretsDetectionResponse or SecretsDetectionResponse: Detection results.
        """
        result = self.regex_model.check(prompt)

        explanation_parts = []
        if result.matched_patterns:
            explanation_parts.append("Found the following secrets in the text:")
            for secret_type, matches in result.matched_patterns.items():
                explanation_parts.append(f"- {secret_type}: {len(matches)} instance(s)")
        else:
            explanation_parts.append("No secrets detected in the text.")

        redacted_text = prompt
        if result.matched_patterns:
            for secret_type, matches in result.matched_patterns.items():
                redacted_text = redact(redacted_text, matches, self.redaction)

        if return_detected_secrets:
            return SecretsDetectionResponse(
                contains_secrets=not result.passed,
                detected_secrets=result.matched_patterns,
                explanation="\n".join(explanation_parts),
                redacted_text=redacted_text,
            )
        else:
            return SecretsDetectionSimpleResponse(
                contains_secrets=not result.passed,
                explanation="\n".join(explanation_parts),
                redacted_text=redacted_text,
            )