Spaces:

TrustSafeAI
/

Attention-Tracker

Running

App Files Files Community

johnsonhung906 commited on Oct 28, 2024

Commit

a8cdd89

1 Parent(s): 88e1260

add intro image & demo image changes

Browse files

Files changed (7) hide show

.DS_Store +0 -0
assets/.DS_Store +0 -0
demo_results/attack_5.png +0 -0
demo_results/normal_5.png +0 -0
figures/attack_intro.png +0 -0
index.html +62 -48
style.css +8 -0

.DS_Store DELETED Viewed

Binary file (6.15 kB)

assets/.DS_Store CHANGED Viewed

Binary files a/assets/.DS_Store and b/assets/.DS_Store differ

demo_results/attack_5.png ADDED Viewed

demo_results/normal_5.png ADDED Viewed

figures/attack_intro.png ADDED Viewed

index.html CHANGED Viewed

@@ -48,41 +48,53 @@
     <script>
       let normalIndex = 0;
       let attackIndex = 0;
       function navigateImages(type, direction) {
-          let images;
-          let currentIndex;
-          // Determine which set of images to target and the current index
           if (type === 'normal') {
-              images = document.querySelectorAll('.normal-gallery .image-gallery img');
-              currentIndex = normalIndex;
           } else if (type === 'attack') {
-              images = document.querySelectorAll('.attack-gallery .image-gallery img');
-              currentIndex = attackIndex;
-          }
-          // Check if images were found to avoid accessing undefined elements
-          if (images && images.length > 0) {
-              // Hide current image
-              images[currentIndex].classList.remove('active');
-              // Calculate new index (modulo ensures cycling)
-              currentIndex = (currentIndex + direction + images.length) % images.length;
-              // Show new image
-              images[currentIndex].classList.add('active');
-              // Update index tracker
-              if (type === 'normal') {
-                  normalIndex = currentIndex;
-              } else if (type === 'attack') {
-                  attackIndex = currentIndex;
-              }
-          } else {
-              console.error("No images found for type:", type);
           }
       }
   </script>
   </head>
@@ -151,14 +163,13 @@
           </span>
           <span class="link-block">
-              <a href="https://dataset-link.com" target="_blank"
-                  class="external-link button is-normal is-rounded is-dark">
-                  <span class="icon">
-                      <i class="fas fa-laptop"></i>
-                  </span>
-                  <span>Demo</span>
-              </a>
-          </span>
       </div>
     </header>
@@ -180,6 +191,7 @@
       <h2 id="what-is-jailbreak" class="section-title">What is Prompt Injection Attack?</h2>
       <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
       <h2 id="refusal-loss" class="section-title">Distraction Effect</h2>
@@ -232,7 +244,7 @@
       <p>
         As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
       </p>
-      <h2 id="demonstration" class="section-title">Demo</h2>
       <p>
         We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
@@ -240,24 +252,26 @@
       <div class="group-title green">Normal Data</div>
       <div class="image-gallery-container normal-gallery">
-          <span class="arrow left-arrow" onclick="navigateImages('normal', -1)">&lt;</span>
-          <div class="image-gallery">
-              <img id="normalImage1" src="./demo_results/normal_1.png" class="active" alt="Normal Image 1">
-              <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
-              <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
-              <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
-          </div>
-          <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
       </div>
       <div class="group-title red">Attack Data</div>
       <div class="image-gallery-container attack-gallery">
           <span class="arrow left-arrow" onclick="navigateImages('attack', -1)">&lt;</span>
           <div class="image-gallery">
-              <img id="attackImage1" src="./demo_results/attack_1.png" class="active" alt="Attack Image 1">
               <img id="attackImage2" src="./demo_results/attack_2.png" alt="Attack Image 2">
               <img id="attackImage3" src="./demo_results/attack_3.png" alt="Attack Image 3">
               <img id="attackImage4" src="./demo_results/attack_4.png" alt="Attack Image 4">
           </div>
           <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
       </div>

     <script>
       let normalIndex = 0;
       let attackIndex = 0;
       function navigateImages(type, direction) {
+        let images;
+        let currentIndex;
+        if (type === 'normal') {
+          images = document.querySelectorAll('.normal-gallery .image-gallery img');
+          currentIndex = normalIndex;
+        } else if (type === 'attack') {
+          images = document.querySelectorAll('.attack-gallery .image-gallery img');
+          currentIndex = attackIndex;
+        }
+        if (images && images.length > 0) {
+          // Remove the active class from the current image
+          images[currentIndex].classList.remove('active');
+          // Update the current index based on direction and number of images
+          currentIndex = (currentIndex + direction + images.length) % images.length;
+          // Add the active class to the new image
+          images[currentIndex].classList.add('active');
+          // Save the updated index
           if (type === 'normal') {
+            normalIndex = currentIndex;
           } else if (type === 'attack') {
+            attackIndex = currentIndex;
           }
+        } else {
+          console.error("No images found for type:", type);
+        }
       }
+      // Initialize the galleries by adding the active class to the first image
+      document.addEventListener("DOMContentLoaded", () => {
+        const normalImages = document.querySelectorAll('.normal-gallery .image-gallery img');
+        const attackImages = document.querySelectorAll('.attack-gallery .image-gallery img');
+        if (normalImages.length > 0) {
+          normalImages[0].classList.add('active');
+        }
+        if (attackImages.length > 0) {
+          attackImages[0].classList.add('active');
+        }
+      });
   </script>
   </head>
           </span>
           <span class="link-block">
+            <a href="#demo" class="external-link button is-normal is-rounded is-dark">
+                <span class="icon">
+                    <i class="fas fa-laptop"></i>
+                </span>
+                <span>Demo</span>
+            </a>
+        </span>
       </div>
     </header>
       <h2 id="what-is-jailbreak" class="section-title">What is Prompt Injection Attack?</h2>
       <p>A Prompt Injection Attack is a technique used to manipulate language models (like GPT-3 or similar AI systems) by injecting malicious or deceptive prompts into the input data, causing the model to behave in unexpected or undesired ways. This attack exploits the way language models interpret and respond to instructions, tricking them into providing information or performing actions that were not originally intended.</p>
+      <div><img id="attack-intro" src="./figures/attack_intro.png" /></div>
       <h2 id="refusal-loss" class="section-title">Distraction Effect</h2>
       <p>
         As shown in the table, Attention Tracker consistently outperforms existing baselines, with an AUROC improvement of up to 3.1% on the Open-Prompt-Injection benchmark and 10.0% on the deepset prompt injection dataset. Among training-free methods, it achieves even greater gains, with an average AUROC improvement of 31.3% and 20.9% across the two datasets, respectively. Unlike LLM-based methods that rely on larger models for stability, Attention Tracker delivers robust and effective performance even with smaller LLMs, underscoring its suitability for real-world applications.
       </p>
+      <h2 id="demo" class="section-title">Demo</h2>
       <p>
         We evaluated the effectiveness of the Attention Tracker by visualizing the distribution of attention aggregation for key heads across different data types (normal data vs. attack data) in the Open-Prompt-Injection dataset. Additionally, we calculated the focus score for these data samples. A higher focus score indicates a lower likelihood of prompt injection attacks. The tested model is Qwen-2 1.8b.
       <div class="group-title green">Normal Data</div>
       <div class="image-gallery-container normal-gallery">
+        <span class="arrow left-arrow" onclick="navigateImages('normal', -1)">&lt;</span>
+        <div class="image-gallery">
+          <!-- <img id="normalImage1" src="./demo_results/normal_1.png" alt="Normal Image 1"> -->
+          <img id="normalImage2" src="./demo_results/normal_2.png" alt="Normal Image 2">
+          <img id="normalImage3" src="./demo_results/normal_3.png" alt="Normal Image 3">
+          <img id="normalImage4" src="./demo_results/normal_4.png" alt="Normal Image 4">
+          <img id="normalImage5" src="./demo_results/normal_5.png" alt="Normal Image 5">
+        </div>
+        <span class="arrow right-arrow" onclick="navigateImages('normal', 1)">&gt;</span>
       </div>
       <div class="group-title red">Attack Data</div>
       <div class="image-gallery-container attack-gallery">
           <span class="arrow left-arrow" onclick="navigateImages('attack', -1)">&lt;</span>
           <div class="image-gallery">
+              <!-- <img id="attackImage1" src="./demo_results/attack_1.png" alt="Attack Image 1" class="active"> -->
               <img id="attackImage2" src="./demo_results/attack_2.png" alt="Attack Image 2">
               <img id="attackImage3" src="./demo_results/attack_3.png" alt="Attack Image 3">
               <img id="attackImage4" src="./demo_results/attack_4.png" alt="Attack Image 4">
+              <img id="attackImage5" src="./demo_results/attack_5.png" alt="Attack Image 5">
           </div>
           <span class="arrow right-arrow" onclick="navigateImages('attack', 1)">&gt;</span>
       </div>

style.css CHANGED Viewed

@@ -109,4 +109,12 @@
 /* Adjust font size for icons */
 .fas, .fab, .ai {
     font-size: 20px; /* Ensuring all icons match the new size */
 }

 /* Adjust font size for icons */
 .fas, .fab, .ai {
     font-size: 20px; /* Ensuring all icons match the new size */
+}
+.image-gallery img {
+	display: none; /* Hide all images by default */
+}
+.image-gallery img.active {
+	display: block; /* Show only the image with the active class */
 }