allenhzy commited on
Commit
34ab225
1 Parent(s): 67260d7
Files changed (1) hide show
  1. index.html +4 -16
index.html CHANGED
@@ -426,29 +426,17 @@
426
  <span id="label-loss" class="formula" style="">
427
  $$
428
  \displaystyle
429
- \begin{aligned}
430
- \phi_\theta(x)&=1-\mathbb{E}_{y \sim T_\theta(x)} JB(y)\\
431
- JB (y) &= \begin{cases}
432
- 1 \text{, if $y$ contains any jailbreak keyword;} \\
433
- 0 \text{, otherwise.}
434
- \end{cases}
435
- \end{aligned}
436
  $$
437
  </span>
438
  <span id="representation-loss" class="formula" style="display: none;">
439
  $$
440
- \displaystyle
441
- \begin{aligned}
442
- f_\theta(x) &=1-\frac{1}{N}\sum_{i=1}^N JB(y_i)\\
443
- JB (y_i) &= \begin{cases}
444
- 1 \text{, if $y_i$ contains any jailbreak keyword;} \\
445
- 0 \text{, otherwise.}
446
- \end{cases}
447
- \end{aligned}
448
  $$
449
  </span>
450
  <span id="total-loss" class="formula" style="display: none;">
451
- $$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$
452
  </span>
453
  </div>
454
  </div>
 
426
  <span id="label-loss" class="formula" style="">
427
  $$
428
  \displaystyle
429
+ Loss_{l} &= \frac{1}{k} \sum_{i=1}^{k} \mathcal{L}\left(\mathbb{C}\left(W^i(x+\delta) \right), y_t\right)
 
 
 
 
 
 
430
  $$
431
  </span>
432
  <span id="representation-loss" class="formula" style="display: none;">
433
  $$
434
+ \displaystyle
435
+ Loss_{r} &= \frac{1}{k} \sum_{i=1}^{k}\mathcal{S}(\mathbb{R}(W^i(x+\delta)), \mathbb{R}(x+\delta))
 
 
 
 
 
 
436
  $$
437
  </span>
438
  <span id="total-loss" class="formula" style="display: none;">
439
+ $$\displaystyle \mathcal{L}_C(x+\delta, y_t) + Sim_l - \alpha \cdot Sim_{r}$$
440
  </span>
441
  </div>
442
  </div>