Spaces:
Running
Running
import weave | |
from pydantic import BaseModel | |
from rich.progress import track | |
from .base import Guardrail | |
class GuardrailManager(weave.Model): | |
""" | |
GuardrailManager is responsible for managing and executing a series of guardrails | |
on a given prompt. It utilizes the `weave` framework to define operations that | |
can be applied to the guardrails. | |
Attributes: | |
guardrails (list[Guardrail]): A list of Guardrail objects that define the | |
rules and checks to be applied to the input prompt. | |
""" | |
guardrails: list[Guardrail] | |
def guard(self, prompt: str, progress_bar: bool = True, **kwargs) -> dict: | |
""" | |
Execute a series of guardrails on a given prompt and return the results. | |
This method iterates over a list of Guardrail objects, applying each guardrail's | |
`guard` method to the provided prompt. It collects responses from each guardrail | |
and compiles them into a summary report. The function also determines the overall | |
safety of the prompt based on the responses from the guardrails. | |
Args: | |
prompt (str): The input prompt to be evaluated by the guardrails. | |
progress_bar (bool, optional): If True, displays a progress bar while | |
processing the guardrails. Defaults to True. | |
**kwargs: Additional keyword arguments to be passed to each guardrail's | |
`guard` method. | |
Returns: | |
dict: A dictionary containing: | |
- "safe" (bool): Indicates whether the prompt is considered safe | |
based on the guardrails' evaluations. | |
- "alerts" (list): A list of dictionaries, each containing the name | |
of the guardrail and its response. | |
- "summary" (str): A formatted string summarizing the results of | |
each guardrail's evaluation. | |
""" | |
alerts, summaries, safe = [], "", True | |
iterable = ( | |
track(self.guardrails, description="Running guardrails") | |
if progress_bar | |
else self.guardrails | |
) | |
for guardrail in iterable: | |
response = guardrail.guard(prompt, **kwargs) | |
alerts.append( | |
{"guardrail_name": guardrail.__class__.__name__, "response": response} | |
) | |
if isinstance(response, BaseModel): | |
safe = safe and response.safe | |
summaries += f"**{guardrail.__class__.__name__}**: {response.explanation}\n\n---\n\n" | |
else: | |
safe = safe and response["safe"] | |
summaries += f"**{guardrail.__class__.__name__}**: {response['summary']}\n\n---\n\n" | |
return {"safe": safe, "alerts": alerts, "summary": summaries} | |
def predict(self, prompt: str, **kwargs) -> dict: | |
""" | |
Predicts the safety and potential issues of a given input prompt using the guardrails. | |
This function serves as a wrapper around the `guard` method, providing a simplified | |
interface for evaluating the input prompt without displaying a progress bar. It | |
applies a series of guardrails to the prompt and returns a detailed assessment. | |
Args: | |
prompt (str): The input prompt to be evaluated by the guardrails. | |
**kwargs: Additional keyword arguments to be passed to each guardrail's | |
`guard` method. | |
Returns: | |
dict: A dictionary containing: | |
- "safe" (bool): Indicates whether the prompt is considered safe | |
based on the guardrails' evaluations. | |
- "alerts" (list): A list of dictionaries, each containing the name | |
of the guardrail and its response. | |
- "summary" (str): A formatted string summarizing the results of | |
each guardrail's evaluation. | |
""" | |
return self.guard(prompt, progress_bar=False, **kwargs) | |