Spaces:

pinyuchen
/

Gradient-Cuff-Jailbreak-Detector-Granite-2B

Running on Zero

App Files Files Community

Update app.py

by gregH - opened Dec 3, 2024

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

-7

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -51,12 +51,12 @@ set_seed(13)
 print(f"Starting to load the model to memory")
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-HF_TOKEN = os.getenv("HF_TOKEN")
 print(HF_TOKEN)
 m = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2b-it",
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
     trust_remote_code=True,token=HF_TOKEN
 )
@@ -64,7 +64,7 @@ m = AutoModelForCausalLM.from_pretrained(
 embedding_func=m.get_input_embeddings()
 embedding_func.weight.requires_grad=False
-tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",
                                     trust_remote_code=True,token=HF_TOKEN
                                    )
 tok.padding_side = "left"
@@ -100,7 +100,7 @@ def embedding_shift(original_embedding,shift_embeddings,prefix_embedding,suffix_
         )
     return input_embeddings
-@spaces.GPU(duration=10)
 def engine(input_embeds):
     m.to("cuda")
     output_text = []
@@ -185,8 +185,9 @@ def gradient_cuff_reject(message,with_defense, sample_times,perturb_times,thresh
     return (False,1-results[0],est_grad.norm().item(),original_response)
-def chat(message, history, with_defense,threshold):
-    perturb_times=9
     sample_times=10
     #threshold=thresholds[perturb_times-1]
     return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
@@ -225,7 +226,7 @@ def chat(message, history, with_defense,threshold):
 add_inputs_name=gr.Accordion(label="Defense Parameters", open=True)
 add_inputs=[
     gr.Checkbox("w/ Gradient Cuff", label="Defense", info="Whether to apply defense"),
-    gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="t - Threshold", info = "The detection threshold used in the 2nd stage.")
     #gr.Slider(minimum=0, maximum=10, step=1, value=2, label="P - Perturb times", info = "The number of the perturbation vectors used to estimate the gradient.")
 ]
 #######################################################################################

 print(f"Starting to load the model to memory")
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+HF_TOKEN = os.getenv("HF_Token")
 print(HF_TOKEN)
 m = AutoModelForCausalLM.from_pretrained(
+    "ibm-granite/granite-3.0-2b-instruct",
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
     trust_remote_code=True,token=HF_TOKEN
 )
 embedding_func=m.get_input_embeddings()
 embedding_func.weight.requires_grad=False
+tok = AutoTokenizer.from_pretrained("ibm-granite/granite-3.0-2b-instruct",
                                     trust_remote_code=True,token=HF_TOKEN
                                    )
 tok.padding_side = "left"
         )
     return input_embeddings
+@spaces.GPU(duration=30)
 def engine(input_embeds):
     m.to("cuda")
     output_text = []
     return (False,1-results[0],est_grad.norm().item(),original_response)
+def chat(message, history, with_defense):
+    threshold=75
+    perturb_times=10
     sample_times=10
     #threshold=thresholds[perturb_times-1]
     return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
 add_inputs_name=gr.Accordion(label="Defense Parameters", open=True)
 add_inputs=[
     gr.Checkbox("w/ Gradient Cuff", label="Defense", info="Whether to apply defense"),
+    #gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="t - Threshold", info = "The detection threshold used in the 2nd stage.")
     #gr.Slider(minimum=0, maximum=10, step=1, value=2, label="P - Perturb times", info = "The number of the perturbation vectors used to estimate the gradient.")
 ]
 #######################################################################################