adaptive
Browse files- index.html +3 -43
index.html
CHANGED
@@ -473,7 +473,7 @@
|
|
473 |
|
474 |
|
475 |
<div class="columns is-centered">
|
476 |
-
|
477 |
<div class="content">
|
478 |
<h2 class="title is-4">Performance against Adaptive Attacks</h2>
|
479 |
</div>
|
@@ -482,48 +482,8 @@
|
|
482 |
<h2 class="title is-4">Contribution of Representation Similarity & Label Con-
|
483 |
sistency against Adaptive Attacks</h2>
|
484 |
</div>
|
485 |
-
</div>
|
486 |
-
|
487 |
-
<div class="row align-items-center">
|
488 |
-
<div class="row" style="margin: 10px 0 0">
|
489 |
-
<div class="models-list">
|
490 |
-
<span style="margin-right: 1em;">Models</span>
|
491 |
-
<span class="radio-group"><input type="radio" id="LLaMA2" class="options" name="models" value="llama2_7b_chat" checked="" /><label for="LLaMA2" class="option-label">LLaMA-2-7B-Chat</label></span>
|
492 |
-
<span class="radio-group"><input type="radio" id="Vicuna" class="options" name="models" value="vicuna_7b_v1.5" /><label for="Vicuna" class="option-label">Vicuna-7B-V1.5</label></span>
|
493 |
-
</div>
|
494 |
-
</div>
|
495 |
-
</div>
|
496 |
-
<div class="row align-items-center">
|
497 |
-
<div class="col-4">
|
498 |
-
<div id="defense-methods">
|
499 |
-
<div class="row align-items-center"><input type="radio" id="defense_ppl" class="options" name="defense" value="ppl" /><label for="defense_ppl" class="defense">CIFAR-10</label></div>
|
500 |
-
<div class="row align-items-center"><input type="radio" id="defense_smoothllm" class="options" name="defense" value="smoothllm" /><label for="defense_smoothllm" class="defense">CIFAR-100</label></div>
|
501 |
-
<div class="row align-items-center"><input type="radio" id="defense_erase_check" class="options" name="defense" value="erase_check" /><label for="defense_erase_check" class="defense">ImageNet</label></div>
|
502 |
-
</div>
|
503 |
-
<div class="row align-items-center">
|
504 |
-
<div class="attack-success-rate"><span class="jailbreak-metric">Average Malicious Refusal Rate</span><span class="attack-success-rate-value" id="asr-value">0.959</span></div>
|
505 |
-
</div>
|
506 |
-
<div class="row align-items-center">
|
507 |
-
<div class="benign-refusal-rate"><span class="jailbreak-metric">Benign Refusal Rate</span><span class="benign-refusal-rate-value" id="brr-value">0.050</span></div>
|
508 |
-
</div>
|
509 |
-
</div>
|
510 |
-
<div class="col-8">
|
511 |
-
<figure class="figure">
|
512 |
-
<img id="reliability-diagram" src="demo_results/gradient_cuff_llama2_7b_chat_threshold_100.png" alt="CIFAR-100 Calibrated Reliability Diagram (Full)" />
|
513 |
-
<div class="slider-container">
|
514 |
-
<div class="slider-label"><span>Perplexity Threshold</span></div>
|
515 |
-
<div class="slider-content" id="ppl-slider"><div id="ppl-threshold" class="ui-slider-handle"></div></div>
|
516 |
-
</div>
|
517 |
-
<div class="slider-container">
|
518 |
-
<div class="slider-label"><span>Gradient Threshold</span></div>
|
519 |
-
<div class="slider-content" id="gradient-norm-slider"><div id="gradient-norm-threshold" class="slider-value ui-slider-handle"></div></div>
|
520 |
-
</div>
|
521 |
-
<figcaption class="figure-caption">
|
522 |
-
</figcaption>
|
523 |
-
</figure>
|
524 |
-
</div>
|
525 |
-
</div>
|
526 |
-
</div>
|
527 |
</div>
|
528 |
</div>
|
529 |
|
|
|
473 |
|
474 |
|
475 |
<div class="columns is-centered">
|
476 |
+
<div class="column">
|
477 |
<div class="content">
|
478 |
<h2 class="title is-4">Performance against Adaptive Attacks</h2>
|
479 |
</div>
|
|
|
482 |
<h2 class="title is-4">Contribution of Representation Similarity & Label Con-
|
483 |
sistency against Adaptive Attacks</h2>
|
484 |
</div>
|
485 |
+
</div>
|
486 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
</div>
|
488 |
</div>
|
489 |
|