Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -51,12 +51,12 @@ set_seed(13)
51
  print(f"Starting to load the model to memory")
52
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
53
 
54
- HF_TOKEN = os.getenv("HF_TOKEN")
55
  print(HF_TOKEN)
56
 
57
 
58
  m = AutoModelForCausalLM.from_pretrained(
59
- "google/gemma-2b-it",
60
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
61
  trust_remote_code=True,token=HF_TOKEN
62
  )
@@ -64,7 +64,7 @@ m = AutoModelForCausalLM.from_pretrained(
64
  embedding_func=m.get_input_embeddings()
65
  embedding_func.weight.requires_grad=False
66
 
67
- tok = AutoTokenizer.from_pretrained("google/gemma-2b-it",
68
  trust_remote_code=True,token=HF_TOKEN
69
  )
70
  tok.padding_side = "left"
@@ -100,7 +100,7 @@ def embedding_shift(original_embedding,shift_embeddings,prefix_embedding,suffix_
100
  )
101
  return input_embeddings
102
 
103
- @spaces.GPU(duration=10)
104
  def engine(input_embeds):
105
  m.to("cuda")
106
  output_text = []
@@ -185,8 +185,9 @@ def gradient_cuff_reject(message,with_defense, sample_times,perturb_times,thresh
185
 
186
  return (False,1-results[0],est_grad.norm().item(),original_response)
187
 
188
- def chat(message, history, with_defense,threshold):
189
- perturb_times=9
 
190
  sample_times=10
191
  #threshold=thresholds[perturb_times-1]
192
  return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
@@ -225,7 +226,7 @@ def chat(message, history, with_defense,threshold):
225
  add_inputs_name=gr.Accordion(label="Defense Parameters", open=True)
226
  add_inputs=[
227
  gr.Checkbox("w/ Gradient Cuff", label="Defense", info="Whether to apply defense"),
228
- gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="t - Threshold", info = "The detection threshold used in the 2nd stage.")
229
  #gr.Slider(minimum=0, maximum=10, step=1, value=2, label="P - Perturb times", info = "The number of the perturbation vectors used to estimate the gradient.")
230
  ]
231
  #######################################################################################
 
51
  print(f"Starting to load the model to memory")
52
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
53
 
54
+ HF_TOKEN = os.getenv("HF_Token")
55
  print(HF_TOKEN)
56
 
57
 
58
  m = AutoModelForCausalLM.from_pretrained(
59
+ "ibm-granite/granite-3.0-2b-instruct",
60
  torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
61
  trust_remote_code=True,token=HF_TOKEN
62
  )
 
64
  embedding_func=m.get_input_embeddings()
65
  embedding_func.weight.requires_grad=False
66
 
67
+ tok = AutoTokenizer.from_pretrained("ibm-granite/granite-3.0-2b-instruct",
68
  trust_remote_code=True,token=HF_TOKEN
69
  )
70
  tok.padding_side = "left"
 
100
  )
101
  return input_embeddings
102
 
103
+ @spaces.GPU(duration=30)
104
  def engine(input_embeds):
105
  m.to("cuda")
106
  output_text = []
 
185
 
186
  return (False,1-results[0],est_grad.norm().item(),original_response)
187
 
188
+ def chat(message, history, with_defense):
189
+ threshold=75
190
+ perturb_times=10
191
  sample_times=10
192
  #threshold=thresholds[perturb_times-1]
193
  return_value=gradient_cuff_reject(message,with_defense, sample_times, perturb_times, threshold)
 
226
  add_inputs_name=gr.Accordion(label="Defense Parameters", open=True)
227
  add_inputs=[
228
  gr.Checkbox("w/ Gradient Cuff", label="Defense", info="Whether to apply defense"),
229
+ #gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="t - Threshold", info = "The detection threshold used in the 2nd stage.")
230
  #gr.Slider(minimum=0, maximum=10, step=1, value=2, label="P - Perturb times", info = "The number of the perturbation vectors used to estimate the gradient.")
231
  ]
232
  #######################################################################################