File size: 7,103 Bytes
dc68796 afc7627 dc68796 afc7627 dc68796 afc7627 dc68796 b375146 dc68796 694cb16 82428f3 dc68796 82428f3 2751a3d 82428f3 eacf5c6 dc68796 82428f3 41c2afe 82428f3 694cb16 2751a3d 82428f3 41c2afe 82428f3 41c2afe af2b329 dc68796 82428f3 dc68796 82428f3 d258ead 82428f3 e484946 82428f3 dc68796 8fb2f84 af2b329 9efd8ca dc68796 82428f3 dc68796 8fb2f84 af2b329 8fb2f84 82428f3 8fb2f84 af2b329 9efd8ca dc68796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
$(document).ready(function(){
$('#jailbreak-intro').on('click', 'span', function(e) {
e.preventDefault();
if (!$(this).hasClass('selected')) {
$('#jailbreak-intro-label > span').removeClass('selected');
$(this).addClass('selected');
let cur_img = $(this).text();
var diagram = document.getElementById("jailbreak-intro-img");
diagram.src = "images/metrics/intro-"+cur_img+".png";
}
});
$('#refusal-loss-formula-list').on('click', 'a', function(e) {
e.preventDefault();
if (!$(this).hasClass('selected')) {
$('.formula').hide(200);
$('.formula-list > a').removeClass('selected');
$(this).addClass('selected');
var target = $(this).attr('href');
$(target).show(200);
}
});
var diagram = document.getElementById("reliability-diagram"), jailbreak_results;
$.getJSON("demo_results/jailbreak_defense_results.jsonl", function(data) {jailbreak_results = data;});
$('input[type=radio][name=models]').change(function() {diagram.src = updateFigurePath();});
$('input[type=radio][name=defense]').change(function() {
if(this.value == 'ppl') {
$("#ppl-slider").slider('enable');
$("#gradient-norm-slider").slider('disable');
}else if (this.value== 'gradient_cuff'){
$("#ppl-slider").slider('disable');
$("#gradient-norm-slider").slider('enable');
}else{
$("#ppl-slider").slider('disable');
$("#gradient-norm-slider").slider('disable');
}
diagram.src = updateFigurePath();
});
var bin_handle = $("#gradient-norm-threshold");
$( "#gradient-norm-slider" ).slider({
step: 50,
min: 0,
max: 800,
value: 100,
create: function() {bin_handle.text($(this).slider("value"));},
slide: function(event, ui) {bin_handle.text(ui.value);diagram.src = updateFigurePath(ui.value);},
});
var ts_handle = $("#ppl-threshold");
$("#ppl-slider").slider({
step: 20,
min: 0,
max: 200,
value: 100,
create: function() {ts_handle.text($(this).slider("value"));},
slide: function( event, ui ) {ts_handle.text(ui.value);diagram.src = updateFigurePath($('#gradient-norm-slider').slider("option", "value"),ui.value);},
});
function updateFigurePath(gradient_norm_threshold=$('#gradient-norm-slider').slider("option", "value"),ppl_threshold=$('#ppl-slider').slider("option", "value")) {
var cur_model = $("input[type='radio'][name='models']:checked").val(),
cur_defense = $("input[type='radio'][name='defense']:checked").val();
var asr_board = document.getElementById("asr-value");
var brr_board = document.getElementById("brr-value");
if (cur_defense === "ppl")
{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"][ppl_threshold.toString()]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"][ppl_threshold.toString()]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"][ppl_threshold.toString()]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"][ppl_threshold.toString()]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"][ppl_threshold.toString()]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"][ppl_threshold.toString()]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"][ppl_threshold.toString()]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"][ppl_threshold.toString()]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"][ppl_threshold.toString()]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"][ppl_threshold.toString()]["fpr"]).toFixed(3);
return "demo_results/ppl_"+cur_model+"_threshold_"+ppl_threshold.toString()+".png"
}
else if (cur_defense === "gradient_cuff")
{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"][gradient_norm_threshold.toString()]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"][gradient_norm_threshold.toString()]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"][gradient_norm_threshold.toString()]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"][gradient_norm_threshold.toString()]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"][gradient_norm_threshold.toString()]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"][gradient_norm_threshold.toString()]["fpr"]).toFixed(3);
return "demo_results/gradient_cuff_"+cur_model+"_threshold_"+gradient_norm_threshold.toString()+".png"
}else{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"]["fpr"]).toFixed(3);
return "demo_results/"+cur_defense+"_"+cur_model+".png"
}
}
});
|