yonad2008 commited on
Commit
69ba471
·
verified ·
1 Parent(s): a439f19

Upload GNN turn-level model artifacts

Browse files
Files changed (3) hide show
  1. README.md +12 -12
  2. gnn_homo_payload.pt +2 -2
  3. metadata.json +8 -8
README.md CHANGED
@@ -13,19 +13,19 @@ model-index:
13
  metrics:
14
  - name: F1
15
  type: f1
16
- value: 0.2062
17
  - name: PR-AUC
18
  type: pr_auc
19
- value: 0.2205
20
  - name: ROC-AUC
21
  type: roc_auc
22
- value: 0.8497
23
  - name: Precision
24
  type: precision
25
- value: 0.1437
26
  - name: Recall
27
  type: recall
28
- value: 0.3744
29
  ---
30
  # GNN Jailbreak Prediction Model (phi4:14b)
31
 
@@ -35,12 +35,12 @@ Homogeneous GNN classifier for unsafe/jailbreak likelihood in multi-turn convers
35
 
36
  | Metric | Value |
37
  |----------------|--------|
38
- | F1 | 0.2062 |
39
- | PR-AUC | 0.2205 |
40
- | ROC-AUC | 0.8497 |
41
- | Precision | 0.1437 |
42
- | Recall | 0.3744 |
43
- | Best Threshold | 0.780 |
44
 
45
  ## Training Details
46
 
@@ -55,4 +55,4 @@ Homogeneous GNN classifier for unsafe/jailbreak likelihood in multi-turn convers
55
 
56
  ## Dataset Size (training samples)
57
 
58
- Prepared turn-level samples: 2585
 
13
  metrics:
14
  - name: F1
15
  type: f1
16
+ value: 0.8586
17
  - name: PR-AUC
18
  type: pr_auc
19
+ value: 0.9720
20
  - name: ROC-AUC
21
  type: roc_auc
22
+ value: 0.9772
23
  - name: Precision
24
  type: precision
25
+ value: 0.8589
26
  - name: Recall
27
  type: recall
28
+ value: 0.9158
29
  ---
30
  # GNN Jailbreak Prediction Model (phi4:14b)
31
 
 
35
 
36
  | Metric | Value |
37
  |----------------|--------|
38
+ | F1 | 0.8586 |
39
+ | PR-AUC | 0.9720 |
40
+ | ROC-AUC | 0.9772 |
41
+ | Precision | 0.8589 |
42
+ | Recall | 0.9158 |
43
+ | Best Threshold | 0.390 |
44
 
45
  ## Training Details
46
 
 
55
 
56
  ## Dataset Size (training samples)
57
 
58
+ Prepared turn-level samples: 395
gnn_homo_payload.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10bcdd0597c5bdbaaf39e8d99347ce740a5f789e9b2e3d32408dd1ad69c3987d
3
- size 1045189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb1c160195c31c644ebb39468cbeed6ecfb41393cebc20f32c681f6161be870
3
+ size 971461
metadata.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "csv": "/home/digayona/multi_turn_jailbreak_RL/GNN/turns_table_llama3_8b_harmbench.csv",
3
  "target_model": "phi4:14b",
4
- "threshold": 0.78,
5
  "sentence_model_name": "sentence-transformers/all-MiniLM-L6-v2",
6
- "n_rows": 2585,
7
  "n_models": 1,
8
  "split_col": "goal",
9
  "seed": 42,
10
  "turn_norm_mode": "dataset_max",
11
  "turn_norm_denom": 22.0,
12
  "session_len_norm_mode": "dataset_max",
13
- "session_len_norm_denom": 22.0,
14
  "model_kwargs": {
15
  "hidden_channels": 128,
16
  "num_layers": 2,
@@ -20,10 +20,10 @@
20
  },
21
  "use_turn_bucket_features": false,
22
  "test_metrics": {
23
- "roc_auc": 0.8497476977719763,
24
- "pr_auc": 0.22046366741105397,
25
- "f1": 0.2062108262108262,
26
- "precision": 0.1437456148001771,
27
- "recall": 0.3743833943833944
28
  }
29
  }
 
1
  {
2
  "csv": "/home/digayona/multi_turn_jailbreak_RL/GNN/turns_table_llama3_8b_harmbench.csv",
3
  "target_model": "phi4:14b",
4
+ "threshold": 0.39,
5
  "sentence_model_name": "sentence-transformers/all-MiniLM-L6-v2",
6
+ "n_rows": 395,
7
  "n_models": 1,
8
  "split_col": "goal",
9
  "seed": 42,
10
  "turn_norm_mode": "dataset_max",
11
  "turn_norm_denom": 22.0,
12
  "session_len_norm_mode": "dataset_max",
13
+ "session_len_norm_denom": 20.0,
14
  "model_kwargs": {
15
  "hidden_channels": 128,
16
  "num_layers": 2,
 
20
  },
21
  "use_turn_bucket_features": false,
22
  "test_metrics": {
23
+ "roc_auc": 0.9772275091195899,
24
+ "pr_auc": 0.9720258299076259,
25
+ "f1": 0.8585849597195537,
26
+ "precision": 0.8589285714285715,
27
+ "recall": 0.9158333333333333
28
  }
29
  }