Spaces:

protectai
/

prompt-injection-benchmark

Running

App Files Files Community

asofter commited on Jul 23, 2024

Commit

6b80b1f

1 Parent(s): e8cf854

* small model support

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +22 -24
requirements.txt +7 -7

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📝
 colorFrom: yellow
 colorTo: gray
 sdk: gradio
-sdk_version: 4.31.2
 pinned: true
 license: apache-2.0
 ---

 colorFrom: yellow
 colorTo: gray
 sdk: gradio
+sdk_version: 4.39.0
 pinned: true
 license: apache-2.0
 ---

app.py CHANGED Viewed

@@ -21,7 +21,8 @@ from transformers import AutoTokenizer, pipeline
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-hf_api = HfApi(token=os.getenv("HF_TOKEN"))
 num_processes = 2  # mp.cpu_count()
 lakera_api_key = os.getenv("LAKERA_API_KEY")
@@ -35,9 +36,15 @@ aws_comprehend_client = boto3.client(service_name="comprehend", region_name="us-
 @lru_cache(maxsize=2)
 def init_prompt_injection_model(prompt_injection_ort_model: str, subfolder: str = "") -> pipeline:
     hf_model = ORTModelForSequenceClassification.from_pretrained(
-        prompt_injection_ort_model, export=False, subfolder=subfolder, file_name="model.onnx"
     )
-    hf_tokenizer = AutoTokenizer.from_pretrained(prompt_injection_ort_model, subfolder=subfolder)
     hf_tokenizer.model_input_names = ["input_ids", "attention_mask"]
     logger.info(f"Initialized classification ONNX model {prompt_injection_ort_model} on CPU")
@@ -58,14 +65,17 @@ def convert_elapsed_time(diff_time) -> float:
 deepset_classifier = init_prompt_injection_model(
-    "ProtectAI/deberta-v3-base-injection-onnx"
 )  # ONNX version of deepset/deberta-v3-base-injection
 protectai_v2_classifier = init_prompt_injection_model(
-    "ProtectAI/deberta-v3-base-prompt-injection-v2", "onnx"
 )
 fmops_classifier = init_prompt_injection_model(
-    "ProtectAI/fmops-distilbert-prompt-injection-onnx"
 )  # ONNX version of fmops/distilbert-prompt-injection
 def detect_hf(
@@ -93,6 +103,10 @@ def detect_hf_protectai_v2(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=protectai_v2_classifier)
 def detect_hf_deepset(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=deepset_classifier)
@@ -153,23 +167,6 @@ def detect_aws_comprehend(prompt: str) -> (bool, bool):
         EndpointArn="arn:aws:comprehend:us-east-1:aws:document-classifier-endpoint/prompt-safety",
         Text=prompt,
     )
-    response = {
-        "Classes": [
-            {"Name": "SAFE_PROMPT", "Score": 0.9010000228881836},
-            {"Name": "UNSAFE_PROMPT", "Score": 0.0989999994635582},
-        ],
-        "ResponseMetadata": {
-            "RequestId": "e8900fe1-3346-45c0-bad3-007b2840865a",
-            "HTTPStatusCode": 200,
-            "HTTPHeaders": {
-                "x-amzn-requestid": "e8900fe1-3346-45c0-bad3-007b2840865a",
-                "content-type": "application/x-amz-json-1.1",
-                "content-length": "115",
-                "date": "Mon, 19 Feb 2024 08:34:43 GMT",
-            },
-            "RetryAttempts": 0,
-        },
-    }
     logger.info(f"Prompt injection result from AWS Comprehend: {response}")
     if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
         logger.error(f"Failed to call AWS Comprehend API: {response}")
@@ -209,13 +206,14 @@ def detect_sydelabs(prompt: str) -> (bool, bool):
 detection_providers = {
     "ProtectAI v2 (HF model)": detect_hf_protectai_v2,
     "Deepset (HF model)": detect_hf_deepset,
     "FMOps (HF model)": detect_hf_fmops,
     "Lakera Guard": detect_lakera,
     # "Rebuff": detect_rebuff,
     "Azure Content Safety": detect_azure,
     "SydeLabs": detect_sydelabs,
-    # "AWS Comprehend": detect_aws_comprehend,
 }

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+hf_token = os.getenv("HF_TOKEN")
+hf_api = HfApi(token=hf_token)
 num_processes = 2  # mp.cpu_count()
 lakera_api_key = os.getenv("LAKERA_API_KEY")
 @lru_cache(maxsize=2)
 def init_prompt_injection_model(prompt_injection_ort_model: str, subfolder: str = "") -> pipeline:
     hf_model = ORTModelForSequenceClassification.from_pretrained(
+        prompt_injection_ort_model,
+        export=False,
+        subfolder=subfolder,
+        file_name="model.onnx",
+        token=hf_token,
+    )
+    hf_tokenizer = AutoTokenizer.from_pretrained(
+        prompt_injection_ort_model, subfolder=subfolder, token=hf_token
     )
     hf_tokenizer.model_input_names = ["input_ids", "attention_mask"]
     logger.info(f"Initialized classification ONNX model {prompt_injection_ort_model} on CPU")
 deepset_classifier = init_prompt_injection_model(
+    "protectai/deberta-v3-base-injection-onnx"
 )  # ONNX version of deepset/deberta-v3-base-injection
 protectai_v2_classifier = init_prompt_injection_model(
+    "protectai/deberta-v3-base-prompt-injection-v2", "onnx"
 )
 fmops_classifier = init_prompt_injection_model(
+    "protectai/fmops-distilbert-prompt-injection-onnx"
 )  # ONNX version of fmops/distilbert-prompt-injection
+protectai_v2_small_classifier = init_prompt_injection_model(
+    "protectai/deberta-v3-small-prompt-injection-v2", "onnx"
+)  # ONNX version of protectai/deberta-v3-small-prompt-injection-v2
 def detect_hf(
     return detect_hf(prompt, classifier=protectai_v2_classifier)
+def detect_hf_protectai_v2_small(prompt: str) -> (bool, bool):
+    return detect_hf(prompt, classifier=protectai_v2_small_classifier)
 def detect_hf_deepset(prompt: str) -> (bool, bool):
     return detect_hf(prompt, classifier=deepset_classifier)
         EndpointArn="arn:aws:comprehend:us-east-1:aws:document-classifier-endpoint/prompt-safety",
         Text=prompt,
     )
     logger.info(f"Prompt injection result from AWS Comprehend: {response}")
     if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
         logger.error(f"Failed to call AWS Comprehend API: {response}")
 detection_providers = {
     "ProtectAI v2 (HF model)": detect_hf_protectai_v2,
+    "ProtectAI v2 Small (HF model)": detect_hf_protectai_v2_small,
     "Deepset (HF model)": detect_hf_deepset,
     "FMOps (HF model)": detect_hf_fmops,
     "Lakera Guard": detect_lakera,
     # "Rebuff": detect_rebuff,
     "Azure Content Safety": detect_azure,
     "SydeLabs": detect_sydelabs,
+    "AWS Comprehend": detect_aws_comprehend,
 }

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-boto3==1.34.104
-gradio==4.31.2
-huggingface_hub==0.23.0
-onnxruntime==1.17.3
-optimum[onnxruntime]==1.19.2
 rebuff==0.1.1
-requests==2.31.0
-transformers==4.39.3

+boto3==1.34.146
+gradio==4.39.0
+huggingface_hub==0.24.0
+onnxruntime==1.18.1
+optimum[onnxruntime]==1.21.2
 rebuff==0.1.1
+requests==2.32.3
+transformers==4.42.4