johnsonhung906 commited on
Commit
a8cdd89
1 Parent(s): 88e1260

add intro image & demo image changes

Browse files
.DS_Store DELETED
Binary file (6.15 kB)
 
assets/.DS_Store CHANGED
Binary files a/assets/.DS_Store and b/assets/.DS_Store differ
 
demo_results/attack_5.png ADDED
demo_results/normal_5.png ADDED
figures/attack_intro.png ADDED
index.html CHANGED
@@ -48,41 +48,53 @@
48
  <script>
49
  let normalIndex = 0;
50
  let attackIndex = 0;
51
-
52
  function navigateImages(type, direction) {
53
- let images;
54
- let currentIndex;
55
-
56
- // Determine which set of images to target and the current index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  if (type === 'normal') {
58
- images = document.querySelectorAll('.normal-gallery .image-gallery img');
59
- currentIndex = normalIndex;
60
  } else if (type === 'attack') {
61
- images = document.querySelectorAll('.attack-gallery .image-gallery img');
62
- currentIndex = attackIndex;
63
- }
64
-
65
- // Check if images were found to avoid accessing undefined elements
66
- if (images && images.length > 0) {
67
- // Hide current image
68
- images[currentIndex].classList.remove('active');
69
-
70
- // Calculate new index (modulo ensures cycling)
71
- currentIndex = (currentIndex + direction + images.length) % images.length;
72
-
73
- // Show new image
74
- images[currentIndex].classList.add('active');
75
-
76
- // Update index tracker
77
- if (type === 'normal') {
78
- normalIndex = currentIndex;
79
- } else if (type === 'attack') {
80
- attackIndex = currentIndex;
81
- }
82
- } else {
83
- console.error("No images found for type:", type);
84
  }
 
 
 
85
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  </script>
87
 
88
  </head>
@@ -151,14 +163,13 @@
151
  </span>
152
 
153
  <span class="link-block">
154
- <a href="https://dataset-link.com" target="_blank"
155
- class="external-link button is-normal is-rounded is-dark">
156
- <span class="icon">
157
- <i class="fas fa-laptop"></i>
158
- </span>
159
- <span>Demo</span>
160
- </a>
161
- </span>
162
  </div>
163
 
164
  </header>
@@ -180,6 +191,7 @@
180
  <h2 id="what-is-jailbreak" class="section-title">What is Prompt Injection Attack?</h2>
181
  <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
182
 
 
183
 
184
  <h2 id="refusal-loss" class="section-title">Distraction Effect</h2>
185
 
@@ -232,7 +244,7 @@
232
  <p>
233
  As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
234
  </p>
235
- <h2 id="demonstration" class="section-title">Demo</h2>
236
 
237
  <p>
238
  We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
@@ -240,24 +252,26 @@
240
  <div class="group-title green">Normal Data</div>
241
 
242
  <div class="image-gallery-container normal-gallery">
243
- <span class="arrow left-arrow" onclick="navigateImages('normal', -1)">&lt;</span>
244
- <div class="image-gallery">
245
- <img id="normalImage1" src="./demo_results/normal_1.png" class="active" alt="Normal Image 1">
246
- <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
247
- <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
248
- <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
249
- </div>
250
- <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
 
251
  </div>
252
 
253
  <div class="group-title red">Attack Data</div>
254
  <div class="image-gallery-container attack-gallery">
255
  <span class="arrow left-arrow" onclick="navigateImages('attack', -1)">&lt;</span>
256
  <div class="image-gallery">
257
- <img id="attackImage1" src="./demo_results/attack_1.png" class="active" alt="Attack Image 1">
258
  <img id="attackImage2" src="./demo_results/attack_2.png" alt="Attack Image 2">
259
  <img id="attackImage3" src="./demo_results/attack_3.png" alt="Attack Image 3">
260
  <img id="attackImage4" src="./demo_results/attack_4.png" alt="Attack Image 4">
 
261
  </div>
262
  <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
263
  </div>
 
48
  <script>
49
  let normalIndex = 0;
50
  let attackIndex = 0;
51
+
52
  function navigateImages(type, direction) {
53
+ let images;
54
+ let currentIndex;
55
+
56
+ if (type === 'normal') {
57
+ images = document.querySelectorAll('.normal-gallery .image-gallery img');
58
+ currentIndex = normalIndex;
59
+ } else if (type === 'attack') {
60
+ images = document.querySelectorAll('.attack-gallery .image-gallery img');
61
+ currentIndex = attackIndex;
62
+ }
63
+
64
+ if (images && images.length > 0) {
65
+ // Remove the active class from the current image
66
+ images[currentIndex].classList.remove('active');
67
+
68
+ // Update the current index based on direction and number of images
69
+ currentIndex = (currentIndex + direction + images.length) % images.length;
70
+
71
+ // Add the active class to the new image
72
+ images[currentIndex].classList.add('active');
73
+
74
+ // Save the updated index
75
  if (type === 'normal') {
76
+ normalIndex = currentIndex;
 
77
  } else if (type === 'attack') {
78
+ attackIndex = currentIndex;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
80
+ } else {
81
+ console.error("No images found for type:", type);
82
+ }
83
  }
84
+
85
+ // Initialize the galleries by adding the active class to the first image
86
+ document.addEventListener("DOMContentLoaded", () => {
87
+ const normalImages = document.querySelectorAll('.normal-gallery .image-gallery img');
88
+ const attackImages = document.querySelectorAll('.attack-gallery .image-gallery img');
89
+
90
+ if (normalImages.length > 0) {
91
+ normalImages[0].classList.add('active');
92
+ }
93
+
94
+ if (attackImages.length > 0) {
95
+ attackImages[0].classList.add('active');
96
+ }
97
+ });
98
  </script>
99
 
100
  </head>
 
163
  </span>
164
 
165
  <span class="link-block">
166
+ <a href="#demo" class="external-link button is-normal is-rounded is-dark">
167
+ <span class="icon">
168
+ <i class="fas fa-laptop"></i>
169
+ </span>
170
+ <span>Demo</span>
171
+ </a>
172
+ </span>
 
173
  </div>
174
 
175
  </header>
 
191
  <h2 id="what-is-jailbreak" class="section-title">What is Prompt Injection Attack?</h2>
192
  <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
193
 
194
+ <div><img id="attack-intro" src="./figures/attack_intro.png" /></div>
195
 
196
  <h2 id="refusal-loss" class="section-title">Distraction Effect</h2>
197
 
 
244
  <p>
245
  As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
246
  </p>
247
+ <h2 id="demo" class="section-title">Demo</h2>
248
 
249
  <p>
250
  We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
 
252
  <div class="group-title green">Normal Data</div>
253
 
254
  <div class="image-gallery-container normal-gallery">
255
+ <span class="arrow left-arrow" onclick="navigateImages('normal', -1)">&lt;</span>
256
+ <div class="image-gallery">
257
+ <!-- <img id="normalImage1" src="./demo_results/normal_1.png" alt="Normal Image 1"> -->
258
+ <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
259
+ <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
260
+ <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
261
+ <img id="normalImage5" src="./demo_results/normal_5.png" alt="Normal Image 5">
262
+ </div>
263
+ <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
264
  </div>
265
 
266
  <div class="group-title red">Attack Data</div>
267
  <div class="image-gallery-container attack-gallery">
268
  <span class="arrow left-arrow" onclick="navigateImages('attack', -1)">&lt;</span>
269
  <div class="image-gallery">
270
+ <!-- <img id="attackImage1" src="./demo_results/attack_1.png" alt="Attack Image 1" class="active"> -->
271
  <img id="attackImage2" src="./demo_results/attack_2.png" alt="Attack Image 2">
272
  <img id="attackImage3" src="./demo_results/attack_3.png" alt="Attack Image 3">
273
  <img id="attackImage4" src="./demo_results/attack_4.png" alt="Attack Image 4">
274
+ <img id="attackImage5" src="./demo_results/attack_5.png" alt="Attack Image 5">
275
  </div>
276
  <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
277
  </div>
style.css CHANGED
@@ -109,4 +109,12 @@
109
  /* Adjust font size for icons */
110
  .fas, .fab, .ai {
111
  font-size: 20px; /* Ensuring all icons match the new size */
 
 
 
 
 
 
 
 
112
  }
 
109
  /* Adjust font size for icons */
110
  .fas, .fab, .ai {
111
  font-size: 20px; /* Ensuring all icons match the new size */
112
+ }
113
+
114
+ .image-gallery img {
115
+ display: none; /* Hide all images by default */
116
+ }
117
+
118
+ .image-gallery img.active {
119
+ display: block; /* Show only the image with the active class */
120
  }