johnsonhung906 commited on
Commit
88e1260
1 Parent(s): 6792367

change header styles

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
assets/css/custom_style.css DELETED
@@ -1,83 +0,0 @@
1
- @media screen and (min-width: 70em) { .main-content { max-width: 70rem; padding: 2rem 6rem; margin: 0 auto; font-size: 1.1rem; } }
2
- @font-face { font-family: 'flexslider-icon'; src: url("../fonts/flexslider-icon.eot"); src: url("../fonts/flexslider-icon.eot?#iefix") format("embedded-opentype"), url("../fonts/flexslider-icon.woff") format("woff"), url("../fonts/flexslider-icon.ttf") format("truetype"), url("../fonts/flexslider-icon.svg#flexslider-icon") format("svg"); font-weight: normal; font-style: normal; }
3
- header h1, header h2 { font-weight: normal; line-height: normal; }
4
-
5
- header h2 { margin-top: .83em; }
6
-
7
- .main-content p { text-align: justify; }
8
-
9
- .jailbreak-intro-sec { width: 80%; margin: 1em auto; }
10
-
11
- #refusal-loss-formula .formula { text-align: center; }
12
-
13
- #refusal-loss-formula .formula-list { width: fit-content; margin: 0 auto; }
14
-
15
- #refusal-loss-formula .formula-list a { display: inline-block; width: 250px; margin: 0 20px; padding: 8px 10px; text-align: center; background: #DDD; cursor: pointer; text-decoration: none; color: #333; border-radius: 10px; user-select: none; transition-duration: 0.3s; }
16
-
17
- #jailbreak-demo .radio-group { margin-right: 5px; }
18
-
19
- input[type='radio'] { visibility: hidden; display: none; }
20
-
21
- #jailbreak-demo .radio-group .option-label { font-size: 1em; cursor: pointer; position: relative; padding: 0.1em 0.6em; border: 1px solid #999; background: #FFF; border-radius: 0.2em; transition: 0.2s; }
22
-
23
- #jailbreak-demo .radio-group .options:checked ~ .option-label { color: #FFF; background: #777; }
24
-
25
- #refusal-loss-formula .formula-list a:hover, #jailbreak-demo #defense-methods .defense:hover { background: #555; color: #FFF; }
26
-
27
- #jailbreak-demo #defense-methods .options:checked ~ .defense { color: #FFF; background: #555; }
28
-
29
- #jailbreak-demo #defense-methods .defense { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; background: #DDD; cursor: pointer; text-decoration: none; color: #333; border-radius: 10px; user-select: none; transition-duration: 0.3s; }
30
-
31
- #jailbreak-demo .legend { text-align: center; width: 70%; margin: 0 auto; }
32
-
33
- #jailbreak-demo .figure-option { text-align: center; width: 70%; margin: 4% auto 0; /* Customize the label (the container) */ /* Hide the browser's default checkbox */ /* Create a custom checkbox */ /* On mouse-over, add a grey background color */ /* When the checkbox is checked, add a blue background */ /* Create the checkmark/indicator (hidden when not checked) */ /* Show the checkmark when checked */ /* Style the checkmark/indicator */ }
34
- #jailbreak-demo .figure-option .container { display: block; position: relative; padding-left: 35px; margin-bottom: 12px; cursor: pointer; font-size: 22px; -webkit-user-select: none; -moz-user-select: none; -ms-user-select: none; user-select: none; }
35
- #jailbreak-demo .figure-option .container input { position: absolute; opacity: 0; cursor: pointer; height: 0; width: 0; }
36
- #jailbreak-demo .figure-option .checkmark { position: absolute; top: 4px; left: 8px; height: 25px; width: 25px; background-color: #eee; }
37
- #jailbreak-demo .figure-option .container:hover input ~ .checkmark { background-color: #ccc; }
38
- #jailbreak-demo .figure-option .container input:checked ~ .checkmark { background-color: #9b9bff; }
39
- #jailbreak-demo .figure-option .checkmark:after { content: ""; position: absolute; display: none; }
40
- #jailbreak-demo .figure-option .container input:checked ~ .checkmark:after { display: block; }
41
- #jailbreak-demo .figure-option .container .checkmark:after { left: 9px; top: 5px; width: 5px; height: 10px; border: solid white; border-width: 0 3px 3px 0; -webkit-transform: rotate(45deg); -ms-transform: rotate(45deg); transform: rotate(45deg); }
42
-
43
- #jailbreak-demo .figure { margin: 0 auto; display: block; }
44
-
45
- #jailbreak-demo .figure #original { display: none; }
46
-
47
- #jailbreak-demo .figure img { user-drag: none; -webkit-user-drag: none; user-select: none; -khtml-user-drag: none; -moz-user-drag: none; -o-user-drag: none; pointer-events: none; position: relative; left: 35px; }
48
-
49
- #jailbreak-demo .figure-caption { width: 240px; text-align: center; display: block; margin: 0 auto; padding: 10px 0 0; font-size: .8em; }
50
-
51
- #jailbreak-demo .figure-caption ul { padding-left: 0; }
52
-
53
- #jailbreak-demo .figure-caption ul li { list-style: none; }
54
-
55
- #jailbreak-demo .figure-caption .model-prediction { font-weight: bold; }
56
-
57
- #jailbreak-demo .figure-caption .correct { color: #009926; }
58
-
59
- #jailbreak-demo .figure-caption .wrong { color: #e31327; }
60
-
61
- #jailbreak-demo .attack-success-rate { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; text-decoration: none; background: #DDD; color: #333; border-radius: 10px; user-select: none; }
62
- #jailbreak-demo .attack-success-rate .jailbreak-metric { font-size: 0.75em; display: block; }
63
- #jailbreak-demo .attack-success-rate .attack-success-rate-value { font-size: 1.5em; font-family: "sans-serif"; color: #820000; }
64
-
65
- #jailbreak-demo .benign-refusal-rate { display: inline-block; width: 60%; margin: 2% auto 8%; padding: 8px 10px; text-align: center; text-decoration: none; background: #DDD; color: #333; border-radius: 10px; user-select: none; }
66
- #jailbreak-demo .benign-refusal-rate .jailbreak-metric { font-size: 0.75em; display: block; }
67
- #jailbreak-demo .benign-refusal-rate .benign-refusal-rate-value { font-size: 1.5em; font-family: "sans-serif"; color: #820000; }
68
-
69
- .warning-quote { padding: 15px; font-size: 0.8em; background-color: #f43636ba; color: white; margin-bottom: 15px; border-left: 5px solid #ff3030; transition-duration: 0.3s; }
70
-
71
- .closebtn { margin-left: 15px; color: white; font-weight: bold; float: right; font-size: 6px; line-height: 20px; cursor: pointer; transition: 0.3s; }
72
-
73
- /* When moving the mouse over the close button */
74
- .closebtn:hover { color: black; }
75
-
76
- .slider-container { display: block; margin-top: 1em; margin-bottom: 0.5em; float: left; }
77
-
78
- .slider-label { width: 140px; float: left; line-height: 1; }
79
-
80
- .slider-content { width: 450px; position: relative; float: right; }
81
-
82
- #ppl-threshold, #gradient-norm-threshold { width: 3em; height: 1.6em; top: 50%; margin-top: -.8em; text-align: center; line-height: 1.6em; }
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/css/style.css CHANGED
@@ -4,7 +4,7 @@
4
  html { font-family: sans-serif; /* 1 */ -ms-text-size-adjust: 100%; /* 2 */ -webkit-text-size-adjust: 100%; /* 2 */ }
5
 
6
  /** Remove default margin. */
7
- body { margin: 0; }
8
 
9
  /* HTML5 display definitions ========================================================================== */
10
  /** Correct `block` display not defined for any HTML5 element in IE 8/9. Correct `block` display not defined for `details` or `summary` in IE 10/11 and Firefox. Correct `block` display not defined for `main` in IE 11. */
@@ -275,9 +275,9 @@ a:hover { text-decoration: underline; }
275
  @media screen and (max-width: 42em) { .page-header { padding: 2rem 1rem; } }
276
 
277
  .project-name { margin-top: 0; margin-bottom: 0.1rem; }
278
- @media screen and (min-width: 64em) { .project-name { font-size: 3.25rem; } }
279
- @media screen and (min-width: 42em) and (max-width: 64em) { .project-name { font-size: 2.25rem; } }
280
- @media screen and (max-width: 42em) { .project-name { font-size: 1.75rem; } }
281
 
282
  .project-tagline { margin-bottom: 2rem; font-weight: normal; opacity: 0.7; }
283
  @media screen and (min-width: 64em) { .project-tagline { font-size: 1.25rem; } }
 
4
  html { font-family: sans-serif; /* 1 */ -ms-text-size-adjust: 100%; /* 2 */ -webkit-text-size-adjust: 100%; /* 2 */ }
5
 
6
  /** Remove default margin. */
7
+ body { margin: 0; font-family: sans-serif;}
8
 
9
  /* HTML5 display definitions ========================================================================== */
10
  /** Correct `block` display not defined for any HTML5 element in IE 8/9. Correct `block` display not defined for `details` or `summary` in IE 10/11 and Firefox. Correct `block` display not defined for `main` in IE 11. */
 
275
  @media screen and (max-width: 42em) { .page-header { padding: 2rem 1rem; } }
276
 
277
  .project-name { margin-top: 0; margin-bottom: 0.1rem; }
278
+ @media screen and (min-width: 64em) { .project-name { font-size: 4.25rem; } }
279
+ @media screen and (min-width: 42em) and (max-width: 64em) { .project-name { font-size: 3.25rem; } }
280
+ @media screen and (max-width: 42em) { .project-name { font-size: 2.75rem; } }
281
 
282
  .project-tagline { margin-bottom: 2rem; font-weight: normal; opacity: 0.7; }
283
  @media screen and (min-width: 64em) { .project-tagline { font-size: 1.25rem; } }
demo_results/normal_1.png CHANGED
demo_results/normal_3.png CHANGED
demo_results/normal_5.png DELETED
Binary file (209 kB)
 
index.html CHANGED
@@ -23,8 +23,8 @@
23
  <link rel="stylesheet" href="assets/css/style.css?v=90447f115a006bc45b738d9592069468b20e2551">
24
  <!-- start custom head snippets, customize with your own _includes/head-custom.html file -->
25
  <link rel="stylesheet" href="assets/css/custom_style.css?v=90447f115a006bc45b738d9592069468b20e2551">
26
-
27
- <!-- <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
28
  <link rel="stylesheet" href="https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/themes/smoothness/jquery-ui.css">
29
  <script src="https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/jquery-ui.min.js"></script>
30
  <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.9.4/Chart.js"></script>
@@ -32,133 +32,66 @@
32
  <link rel="stylesheet" href="//code.jquery.com/ui/1.13.2/themes/base/jquery-ui.css">
33
  <link rel="stylesheet" href="/resources/demos/style.css">
34
  <script src="https://code.jquery.com/jquery-3.6.0.js"></script>
35
- <script src="https://code.jquery.com/ui/1.13.2/jquery-ui.js"></script> -->
36
 
37
  <!-- for mathjax support -->
38
  <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
39
  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
40
  <!-- end custom head snippets -->
41
 
42
- <script>
43
- let normalIndex = 0;
44
- let attackIndex = 0;
45
-
46
- function navigateImages(type, direction) {
47
- let images;
48
- let currentIndex;
49
-
50
- // Determine which set of images to target and the current index
51
- if (type === 'normal') {
52
- images = document.querySelectorAll('.normal-gallery .image-gallery img');
53
- currentIndex = normalIndex;
54
- } else if (type === 'attack') {
55
- images = document.querySelectorAll('.attack-gallery .image-gallery img');
56
- currentIndex = attackIndex;
57
- }
58
-
59
- // Check if images were found to avoid accessing undefined elements
60
- if (images && images.length > 0) {
61
- // Hide current image
62
- images[currentIndex].classList.remove('active');
63
-
64
- // Calculate new index (modulo ensures cycling)
65
- currentIndex = (currentIndex + direction + images.length) % images.length;
66
-
67
- // Show new image
68
- images[currentIndex].classList.add('active');
69
-
70
- // Update index tracker
71
- if (type === 'normal') {
72
- normalIndex = currentIndex;
73
- } else if (type === 'attack') {
74
- attackIndex = currentIndex;
75
- }
76
- } else {
77
- console.error("No images found for type:", type);
78
- }
79
- }
80
- </script>
81
-
82
- <style>
83
- .image-gallery-container {
84
- position: relative;
85
- display: flex;
86
- justify-content: center;
87
- align-items: center;
88
- margin: 20px auto;
89
- }
90
 
91
- .image-gallery {
92
- width: 80%; /* Increased width */
93
- max-height: 600px; /* Increased height */
94
- overflow: hidden;
95
- position: relative;
96
- margin-left: 50px;
97
- margin-right: 50px;
98
- }
99
-
100
- .image-gallery img {
101
- width: 100%;
102
- height: auto;
103
- border-radius: 10px;
104
- display: none;
105
- }
106
-
107
- .image-gallery img.active {
108
- display: block;
109
- }
110
-
111
- .arrow {
112
- cursor: pointer;
113
- position: absolute;
114
- top: 50%;
115
- transform: translateY(-50%);
116
- font-size: 2.5em;
117
- color: #444; /* Darker color */
118
- padding: 5px 15px;
119
- transition: color 0.3s;
120
- user-select: none;
121
- transform: scaleX(0.7);
122
- }
123
-
124
- .arrow:hover {
125
- color: #007bff; /* Color on hover */
126
- }
127
-
128
- .left-arrow {
129
- left: 20px; /* Adjusted position */
130
- }
131
 
132
- .right-arrow {
133
- right: 20px; /* Adjusted position */
134
- }
135
-
136
- .group-title {
137
- font-size: large;
138
- font-weight: bold;
139
- margin-top: 30px;
140
- font-size: 1.2em;
141
- text-align: center; /* Center the text */
142
- display: block; /* Ensure it behaves as a block element */
143
- margin-bottom: 10px; /* Add some space below the title */
144
- }
145
-
146
- .green {
147
- color: green; /* Set text color to green */
148
- }
149
-
150
- .red {
151
- color: red; /* Set text color to green */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
- </style>
154
-
155
  </head>
156
  <body>
157
  <header class="page-header" role="banner">
158
- <h1 class="project-name">Attention Tracker</h1>
159
  <h2 class="project-tagline">Attention Tracker: Detecting Prompt Injection Attacks in LLMs</h2>
160
- <h2 class="project-tagline"><a href="https://arxiv.org/abs/xxx" style="color: white;" target="_blank" rel="noopener noreferrer">https://arxiv.org/abs/xx.xx</a></h2>
161
- <div style="text-align: center; font-size: large;">
162
  <div>
163
  <a href="https://khhung906.github.io/" style="color: white;" target="_blank" rel="noopener noreferrer">
164
  Kuo-Han Hung<sup>1</sup>,
@@ -182,12 +115,57 @@
182
  </a>
183
  </div>
184
 
185
- <div>
186
  <sup>1</sup>National Taiwan University <sup>2</sup>IBM Research
187
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  </header>
 
 
189
  <main id="content" class="main-content" role="main">
190
- <h2 id="abstract">Abstract</h2>
191
 
192
  <p>Large Language Models (LLMs) have revolutionized various domains but remain vulnerable to prompt injection attacks, where malicious inputs manipulate the model into ignoring original instructions and executing designated action. In this paper, we investigate
193
  the underlying mechanisms of these attacks by analyzing the attention patterns within LLMs.
@@ -199,11 +177,11 @@
199
  demonstrate the robustness of our approach through extensive evaluations and provide insights into safeguarding LLM-integrated systems from prompt injection vulnerabilities.
200
  </p>
201
 
202
- <h2 id="what-is-jailbreak">What is Prompt Injection Attack?</h2>
203
  <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
204
 
205
 
206
- <h2 id="refusal-loss">Distraction Effect</h2>
207
 
208
  <p>
209
  In this section, we analyze the reasons behind the success of prompt injection attacks on LLMs. Specifically, we aim to understand
@@ -223,7 +201,7 @@
223
 
224
  </div>
225
 
226
- <h2 id="proposed-approach-attention-tracker">Proposed Approach: Attention Tracker</h2>
227
  <p> With the discover of distraction effect, we propose <strong>Attention Tracker</strong>,
228
  a prompt injection detection method based on tracking the attention pattern on instruction. Our detection procedure is shown below:
229
  </p>
@@ -245,7 +223,7 @@
245
  We provide more details about the running flow of Attention Tracker in the paper.
246
  </p>
247
 
248
- <h2 id="result-attention-tracker">Experiment Result</h2>
249
  <p>
250
  In this section, we evaluate Attention Tracker against various baselines with the AUROC score on two prompt injection detection benchmarks: Open-Prompt-Injection and deepset prompt injection dataset:
251
  </p>
@@ -254,7 +232,7 @@
254
  <p>
255
  As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
256
  </p>
257
- <h2 id="demonstration">Demo</h2>
258
 
259
  <p>
260
  We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
@@ -268,7 +246,6 @@
268
  <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
269
  <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
270
  <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
271
- <img id="normalImage4" src="./demo_results/normal_5.png" alt="Normal Image 5">
272
  </div>
273
  <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
274
  </div>
@@ -285,12 +262,12 @@
285
  <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
286
  </div>
287
 
288
- <h2 id="inquiries"> Inquiries on Attention Tracker</h2>
289
- <p> Please contact <a href="Mailto:khhung906@gmail.com">Kuo-Han Hung</a>
290
  and <a href="Mailto:pin-yu.chen@ibm.com">Pin-Yu Chen</a>
291
- </p>
292
 
293
- <h2 id="citations">Citations</h2>
294
  <p>If you find Attention Tracker helpful and useful for your research, please cite our main paper as follows:</p>
295
 
296
  <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
 
23
  <link rel="stylesheet" href="assets/css/style.css?v=90447f115a006bc45b738d9592069468b20e2551">
24
  <!-- start custom head snippets, customize with your own _includes/head-custom.html file -->
25
  <link rel="stylesheet" href="assets/css/custom_style.css?v=90447f115a006bc45b738d9592069468b20e2551">
26
+ <link rel="stylesheet" href="style.css">
27
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
28
  <link rel="stylesheet" href="https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/themes/smoothness/jquery-ui.css">
29
  <script src="https://ajax.googleapis.com/ajax/libs/jqueryui/1.12.1/jquery-ui.min.js"></script>
30
  <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.9.4/Chart.js"></script>
 
32
  <link rel="stylesheet" href="//code.jquery.com/ui/1.13.2/themes/base/jquery-ui.css">
33
  <link rel="stylesheet" href="/resources/demos/style.css">
34
  <script src="https://code.jquery.com/jquery-3.6.0.js"></script>
35
+ <script src="https://code.jquery.com/ui/1.13.2/jquery-ui.js"></script>
36
 
37
  <!-- for mathjax support -->
38
  <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
39
  <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
40
  <!-- end custom head snippets -->
41
 
42
+ <!-- Font Awesome for PDF and GitHub icons -->
43
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ <!-- AI2 HTML-CSS Icons (for arXiv) -->
46
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/academicons/1.9.1/css/academicons.min.css">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ <script>
49
+ let normalIndex = 0;
50
+ let attackIndex = 0;
51
+
52
+ function navigateImages(type, direction) {
53
+ let images;
54
+ let currentIndex;
55
+
56
+ // Determine which set of images to target and the current index
57
+ if (type === 'normal') {
58
+ images = document.querySelectorAll('.normal-gallery .image-gallery img');
59
+ currentIndex = normalIndex;
60
+ } else if (type === 'attack') {
61
+ images = document.querySelectorAll('.attack-gallery .image-gallery img');
62
+ currentIndex = attackIndex;
63
+ }
64
+
65
+ // Check if images were found to avoid accessing undefined elements
66
+ if (images && images.length > 0) {
67
+ // Hide current image
68
+ images[currentIndex].classList.remove('active');
69
+
70
+ // Calculate new index (modulo ensures cycling)
71
+ currentIndex = (currentIndex + direction + images.length) % images.length;
72
+
73
+ // Show new image
74
+ images[currentIndex].classList.add('active');
75
+
76
+ // Update index tracker
77
+ if (type === 'normal') {
78
+ normalIndex = currentIndex;
79
+ } else if (type === 'attack') {
80
+ attackIndex = currentIndex;
81
+ }
82
+ } else {
83
+ console.error("No images found for type:", type);
84
+ }
85
  }
86
+ </script>
87
+
88
  </head>
89
  <body>
90
  <header class="page-header" role="banner">
91
+ <h1 class="project-name" style="font-weight: 500;">Attention Tracker</h1>
92
  <h2 class="project-tagline">Attention Tracker: Detecting Prompt Injection Attacks in LLMs</h2>
93
+ <p/>
94
+ <div style="text-align: center; font-size:larger; ">
95
  <div>
96
  <a href="https://khhung906.github.io/" style="color: white;" target="_blank" rel="noopener noreferrer">
97
  Kuo-Han Hung<sup>1</sup>,
 
115
  </a>
116
  </div>
117
 
118
+ <div style="color: #f1f0f0">
119
  <sup>1</sup>National Taiwan University <sup>2</sup>IBM Research
120
  </div>
121
+
122
+ <div class="publication-links">
123
+ <span class="link-block">
124
+ <a href="https://arxiv.org/pdf/<ARXIV PAPER ID>.pdf" target="_blank"
125
+ class="external-link button is-normal is-rounded is-dark">
126
+ <span class="icon">
127
+ <i class="fas fa-file-pdf"></i>
128
+ </span>
129
+ <span>Paper</span>
130
+ </a>
131
+ </span>
132
+
133
+ <span class="link-block">
134
+ <a href="https://arxiv.org/abs/<ARXIV PAPER ID>" target="_blank"
135
+ class="external-link button is-normal is-rounded is-dark">
136
+ <span class="icon">
137
+ <i class="ai ai-arxiv"></i>
138
+ </span>
139
+ <span>arXiv</span>
140
+ </a>
141
+ </span>
142
+
143
+ <span class="link-block">
144
+ <a href="https://github.com/YOUR REPO HERE" target="_blank"
145
+ class="external-link button is-normal is-rounded is-dark">
146
+ <span class="icon">
147
+ <i class="fab fa-github"></i>
148
+ </span>
149
+ <span>Code</span>
150
+ </a>
151
+ </span>
152
+
153
+ <span class="link-block">
154
+ <a href="https://dataset-link.com" target="_blank"
155
+ class="external-link button is-normal is-rounded is-dark">
156
+ <span class="icon">
157
+ <i class="fas fa-laptop"></i>
158
+ </span>
159
+ <span>Demo</span>
160
+ </a>
161
+ </span>
162
+ </div>
163
+
164
  </header>
165
+
166
+
167
  <main id="content" class="main-content" role="main">
168
+ <h2 id="abstract" class="section-title">Abstract</h2>
169
 
170
  <p>Large Language Models (LLMs) have revolutionized various domains but remain vulnerable to prompt injection attacks, where malicious inputs manipulate the model into ignoring original instructions and executing designated action. In this paper, we investigate
171
  the underlying mechanisms of these attacks by analyzing the attention patterns within LLMs.
 
177
  demonstrate the robustness of our approach through extensive evaluations and provide insights into safeguarding LLM-integrated systems from prompt injection vulnerabilities.
178
  </p>
179
 
180
+ <h2 id="what-is-jailbreak" class="section-title">What is Prompt Injection Attack?</h2>
181
  <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
182
 
183
 
184
+ <h2 id="refusal-loss" class="section-title">Distraction Effect</h2>
185
 
186
  <p>
187
  In this section, we analyze the reasons behind the success of prompt injection attacks on LLMs. Specifically, we aim to understand
 
201
 
202
  </div>
203
 
204
+ <h2 id="proposed-approach-attention-tracker" class="section-title">Proposed Approach: Attention Tracker</h2>
205
  <p> With the discover of distraction effect, we propose <strong>Attention Tracker</strong>,
206
  a prompt injection detection method based on tracking the attention pattern on instruction. Our detection procedure is shown below:
207
  </p>
 
223
  We provide more details about the running flow of Attention Tracker in the paper.
224
  </p>
225
 
226
+ <h2 id="result-attention-tracker" class="section-title">Experiment Result</h2>
227
  <p>
228
  In this section, we evaluate Attention Tracker against various baselines with the AUROC score on two prompt injection detection benchmarks: Open-Prompt-Injection and deepset prompt injection dataset:
229
  </p>
 
232
  <p>
233
  As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
234
  </p>
235
+ <h2 id="demonstration" class="section-title">Demo</h2>
236
 
237
  <p>
238
  We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
 
246
  <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
247
  <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
248
  <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
 
249
  </div>
250
  <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
251
  </div>
 
262
  <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
263
  </div>
264
 
265
+ <!-- <h2 id="inquiries" class="section-title"> Inquiries on Attention Tracker</h2>
266
+ <p class="section-title"> Please contact <a href="Mailto:khhung906@gmail.com">Kuo-Han Hung</a>
267
  and <a href="Mailto:pin-yu.chen@ibm.com">Pin-Yu Chen</a>
268
+ </p> -->
269
 
270
+ <h2 id="citations" class="section-title">Citations</h2>
271
  <p>If you find Attention Tracker helpful and useful for your research, please cite our main paper as follows:</p>
272
 
273
  <div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
style.css CHANGED
@@ -1,86 +1,112 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
 
 
 
4
  }
5
 
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
 
 
 
 
9
  }
10
 
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
  }
17
 
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
  }
25
 
26
- .card p:last-child {
27
- margin-bottom: 0;
 
 
 
 
 
 
 
 
 
28
  }
29
 
30
- .image-gallery {
31
- display: flex; /* Use flexbox for side-by-side images */
32
- justify-content: center;
33
- gap: 20px; /* Space between images */
34
- margin-top: 20px; /* Add space above the image gallery */
35
  }
36
 
37
- .image-gallery img {
38
- width: 80%; /* Set width to 80% of the window */
39
- max-width: 500px; /* Ensure maximum width remains 500px */
40
- height: auto; /* Maintain aspect ratio */
41
- border-radius: 10px; /* Rounded corners for images */
42
- box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1); /* Add shadow to images */
43
- display: none; /* Initially hide all images */
44
  }
45
 
46
- .button-container {
47
- text-align: center;
48
- margin-top: 20px;
49
  }
50
 
51
- .button-container .group-title {
 
52
  font-weight: bold;
53
- margin: 20px 0 10px;
54
  font-size: 1.2em;
 
 
 
55
  }
56
 
57
- .normal-button {
58
- background-color: #28a745; /* Green for Normal Data */
59
  }
60
 
61
- .attack-button {
62
- background-color: #dc3545; /* Red for Attack Data */
63
  }
64
 
65
- .button-container button {
66
- padding: 10px 20px;
67
- margin: 5px;
68
- border: none;
69
- color: white;
70
- cursor: pointer;
71
- border-radius: 5px;
72
- font-size: 1em; /* Increased font size */
73
- transition: background-color 0.3s, transform 0.3s; /* Smooth transitions */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
75
 
76
- .button-container button:hover {
77
- transform: scale(1.05); /* Slightly increase size on hover */
 
78
  }
79
 
80
- .normal-button:hover {
81
- background-color: #218838; /* Darker green on hover */
 
82
  }
83
 
84
- .attack-button:hover {
85
- background-color: #c82333; /* Darker red on hover */
 
86
  }
 
1
+ .image-gallery-container {
2
+ position: relative;
3
+ display: flex;
4
+ justify-content: center;
5
+ align-items: center;
6
+ margin: 20px auto;
7
  }
8
 
9
+ .image-gallery {
10
+ width: 80%; /* Increased width */
11
+ max-height: 600px; /* Increased height */
12
+ overflow: hidden;
13
+ position: relative;
14
+ margin-left: 50px;
15
+ margin-right: 50px;
16
  }
17
 
18
+ .image-gallery img {
19
+ width: 100%;
20
+ height: auto;
21
+ border-radius: 10px;
22
+ display: none;
23
  }
24
 
25
+ .image-gallery img.active {
26
+ display: block;
 
 
 
 
27
  }
28
 
29
+ .arrow {
30
+ cursor: pointer;
31
+ position: absolute;
32
+ top: 50%;
33
+ transform: translateY(-50%);
34
+ font-size: 2.5em;
35
+ color: #444; /* Darker color */
36
+ padding: 5px 15px;
37
+ transition: color 0.3s;
38
+ user-select: none;
39
+ transform: scaleX(0.7);
40
  }
41
 
42
+ .arrow:hover {
43
+ color: #007bff; /* Color on hover */
 
 
 
44
  }
45
 
46
+ .left-arrow {
47
+ left: 20px; /* Adjusted position */
 
 
 
 
 
48
  }
49
 
50
+ .right-arrow {
51
+ right: 20px; /* Adjusted position */
 
52
  }
53
 
54
+ .group-title {
55
+ font-size: large;
56
  font-weight: bold;
57
+ margin-top: 30px;
58
  font-size: 1.2em;
59
+ text-align: center; /* Center the text */
60
+ display: block; /* Ensure it behaves as a block element */
61
+ margin-bottom: 10px; /* Add some space below the title */
62
  }
63
 
64
+ .green {
65
+ color: green; /* Set text color to green */
66
  }
67
 
68
+ .red {
69
+ color: red; /* Set text color to green */
70
  }
71
 
72
+ .section-title {
73
+ text-align: center;
74
+ }
75
+
76
+
77
+ .publication-links {
78
+ text-align: center;
79
+ margin-top: 20px;
80
+ }
81
+
82
+ .link-block {
83
+ display: inline-block;
84
+ margin: 8px; /* Increase spacing between buttons */
85
+ }
86
+
87
+ .button {
88
+ display: inline-flex;
89
+ align-items: center;
90
+ padding: 10px 18px; /* Larger padding for bigger buttons */
91
+ text-decoration: none;
92
+ color: black; /* Fixed text color */
93
+ background-color: rgb(236, 236, 236); /* Dark background color */
94
+ border-radius: 25px; /* Rounded corners */
95
+ font-size: 18px; /* Larger font size */
96
+ transition: background-color 0.3s ease;
97
  }
98
 
99
+ .button:hover {
100
+ background-color: #495057; /* Slightly lighter on hover */
101
+ color: white; /* Ensure text color remains white on hover */
102
  }
103
 
104
+ .icon {
105
+ margin-right: 8px; /* Adjust spacing between icon and text */
106
+ font-size: 20px; /* Larger icon size */
107
  }
108
 
109
+ /* Adjust font size for icons */
110
+ .fas, .fab, .ai {
111
+ font-size: 20px; /* Ensuring all icons match the new size */
112
  }