MAS-AI-0000 commited on
Commit
3cdaafb
·
verified ·
1 Parent(s): dd1f86a

Upload 6 files

Browse files
detree/utils/detectors/Fast_DetectGPT_evaluation.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import random
3
+ import numpy as np
4
+ import torch
5
+ import argparse
6
+ import json
7
+ from tqdm import tqdm
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ from ..utils import evaluate_metrics
10
+
11
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
12
+
13
+ def get_sampling_discrepancy_analytic(logits_ref, logits_score, labels):
14
+ assert logits_ref.shape[0] == 1
15
+ assert logits_score.shape[0] == 1
16
+ assert labels.shape[0] == 1
17
+ if logits_ref.size(-1) != logits_score.size(-1):
18
+ # print(f"WARNING: vocabulary size mismatch {logits_ref.size(-1)} vs {logits_score.size(-1)}.")
19
+ vocab_size = min(logits_ref.size(-1), logits_score.size(-1))
20
+ logits_ref = logits_ref[:, :, :vocab_size]
21
+ logits_score = logits_score[:, :, :vocab_size]
22
+
23
+ labels = labels.unsqueeze(-1) if labels.ndim == logits_score.ndim - 1 else labels
24
+ lprobs_score = torch.log_softmax(logits_score, dim=-1)
25
+ probs_ref = torch.softmax(logits_ref, dim=-1)
26
+ log_likelihood = lprobs_score.gather(dim=-1, index=labels).squeeze(-1)
27
+ mean_ref = (probs_ref * lprobs_score).sum(dim=-1)
28
+ var_ref = (probs_ref * torch.square(lprobs_score)).sum(dim=-1) - torch.square(mean_ref)
29
+ discrepancy = (log_likelihood.sum(dim=-1) - mean_ref.sum(dim=-1)) / var_ref.sum(dim=-1).sqrt()
30
+ discrepancy = discrepancy.mean()
31
+ return discrepancy.item()
32
+
33
+ def get_text_crit(text, args, model_config):
34
+ tokenized = model_config["scoring_tokenizer"](text, return_tensors="pt",
35
+ return_token_type_ids=False)
36
+ labels = tokenized.input_ids[:, 1:]
37
+ with torch.no_grad():
38
+ logits_score = model_config["scoring_model"](**tokenized).logits[:, :-1]
39
+ if args.reference_model == args.scoring_model:
40
+ logits_ref = logits_score
41
+ else:
42
+ tokenized = model_config["reference_tokenizer"](text, return_tensors="pt",
43
+ return_token_type_ids=False)
44
+ assert torch.all(tokenized.input_ids[:, 1:] == labels), "Tokenizer is mismatch."
45
+ logits_ref = model_config["reference_model"](**tokenized).logits[:, :-1]
46
+ text_crit = get_sampling_discrepancy_analytic(logits_ref, logits_score, labels)
47
+
48
+ return text_crit
49
+
50
+ def load_jsonl(file_path):
51
+ out = []
52
+ with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
53
+ for line in jsonl_file:
54
+ item = json.loads(line)
55
+ out.append(item)
56
+ print(f"Loaded {len(out)} examples from {file_path}")
57
+ return out
58
+
59
+ def dict2str(metrics):
60
+ out_str=''
61
+ for key in metrics.keys():
62
+ out_str+=f"{key}:{metrics[key]} "
63
+ return out_str
64
+
65
+ def experiment(args):
66
+ # load model
67
+ logging.info(f"Loading reference model of type {args.reference_model}...")
68
+ reference_tokenizer = AutoTokenizer.from_pretrained(args.reference_model)
69
+ reference_model = AutoModelForCausalLM.from_pretrained(args.reference_model,device_map="auto")
70
+ reference_model.eval()
71
+ reference_model
72
+ scoring_tokenizer = AutoTokenizer.from_pretrained(args.scoring_model)
73
+ scoring_model = AutoModelForCausalLM.from_pretrained(args.scoring_model,device_map="auto")
74
+ scoring_model.eval()
75
+ scoring_model
76
+
77
+ model_config = {
78
+ "reference_tokenizer": reference_tokenizer,
79
+ "reference_model": reference_model,
80
+ "scoring_tokenizer": scoring_tokenizer,
81
+ "scoring_model": scoring_model,
82
+ }
83
+
84
+ logging.info(f"Test in {args.test_data_path}")
85
+ test_data = load_jsonl(args.test_data_path)
86
+ random.seed(args.seed)
87
+ torch.manual_seed(args.seed)
88
+ np.random.seed(args.seed)
89
+ random.shuffle(test_data)
90
+ predictions = []
91
+ labels = []
92
+ st = time.time()
93
+ for i, item in tqdm(enumerate(test_data), total=len(test_data)):
94
+ if i>=100: # for debugging, only use the first 100 samples
95
+ break
96
+ text = item["text"]
97
+ label = item["label"]
98
+ src = item["src"]
99
+ text_crit = get_text_crit(text, args, model_config)
100
+ if text_crit is None or np.isnan(text_crit) or np.isinf(text_crit):
101
+ text_crit = 0
102
+ if 'human' in src:
103
+ labels.append(0)
104
+ else:
105
+ labels.append(1)
106
+ predictions.append(text_crit)
107
+ ed = time.time()
108
+ print((ed - st) / 100)
109
+ # metric = evaluate_metrics(labels, predictions)
110
+ # print(dict2str(metric))
111
+ # with open("runs/val-other_detector.txt",'a+') as f:
112
+ # f.write(f"Fast DetectGPT {args.test_data_path} {args.scoring_model} {args.reference_model}\n")
113
+ # f.write(f"{dict2str(metric)}\n")
114
+
115
+
116
+
117
+ # logging.info(f"{result}")
118
+ # with open(filename.split(".json")[0] + "_Fast_DetectGPT_data.json", "w") as f:
119
+ # json.dump(test_data, f, indent=4)
120
+
121
+ # with open(filename.split(".json")[0] + "_Fast_DetectGPT_result.json", "w") as f:
122
+ # json.dump(result, f, indent=4)
123
+
124
+
125
+ if __name__ == '__main__':
126
+ parser = argparse.ArgumentParser()
127
+ parser.add_argument('--test_data_path', type=str, default='/path/to/RealBench/Beemo/Llama_edited/test.jsonl',
128
+ help="Path to the test data. could be several files with ','. "
129
+ "Note that the data should have been perturbed.")
130
+ parser.add_argument('--reference_model', type=str, default="EleutherAI/gpt-neo-2.7B")
131
+ parser.add_argument('--scoring_model', type=str, default="EleutherAI/gpt-j-6B")
132
+ parser.add_argument('--DEVICE0', default="cuda:0", type=str, required=False)
133
+ parser.add_argument('--DEVICE1', default="cuda:1", type=str, required=False)
134
+ parser.add_argument('--seed', default=2023, type=int, required=False)
135
+ args = parser.parse_args()
136
+
137
+ experiment(args)
detree/utils/detectors/MAGE.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import random
5
+ import re
6
+ from itertools import chain
7
+ from pathlib import Path
8
+ from typing import Sequence
9
+
10
+ import numpy as np
11
+ import regex
12
+ import torch
13
+ from cleantext import clean
14
+ from tqdm import tqdm
15
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
16
+
17
+ from ..utils import evaluate_metrics
18
+
19
+ _LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
20
+ _LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
21
+
22
+ class MosesPunctNormalizer:
23
+
24
+ EXTRA_WHITESPACE = [ # lines 21 - 30
25
+ (r"\r", r""),
26
+ (r"\(", r" ("),
27
+ (r"\)", r") "),
28
+ (r" +", r" "),
29
+ (r"\) ([.!:?;,])", r")\g<1>"),
30
+ (r"\( ", r"("),
31
+ (r" \)", r")"),
32
+ (r"(\d) %", r"\g<1>%"),
33
+ (r" :", r":"),
34
+ (r" ;", r";"),
35
+ ]
36
+
37
+ NORMALIZE_UNICODE_IF_NOT_PENN = [(r"`", r"'"), (r"''", r' " ')] # lines 33 - 34
38
+
39
+ NORMALIZE_UNICODE = [ # lines 37 - 50
40
+ ("„", r'"'),
41
+ ("“", r'"'),
42
+ ("”", r'"'),
43
+ ("–", r"-"),
44
+ ("—", r" - "),
45
+ (r" +", r" "),
46
+ ("´", r"'"),
47
+ ("([a-zA-Z])‘([a-zA-Z])", r"\g<1>'\g<2>"),
48
+ ("([a-zA-Z])’([a-zA-Z])", r"\g<1>'\g<2>"),
49
+ ("‘", r"'"),
50
+ ("‚", r"'"),
51
+ ("’", r"'"),
52
+ (r"''", r'"'),
53
+ ("´´", r'"'),
54
+ ("…", r"..."),
55
+ ]
56
+
57
+ FRENCH_QUOTES = [ # lines 52 - 57
58
+ ("\u00A0«\u00A0", r'"'),
59
+ ("«\u00A0", r'"'),
60
+ ("«", r'"'),
61
+ ("\u00A0»\u00A0", r'"'),
62
+ ("\u00A0»", r'"'),
63
+ ("»", r'"'),
64
+ ]
65
+
66
+ HANDLE_PSEUDO_SPACES = [ # lines 59 - 67
67
+ ("\u00A0%", r"%"),
68
+ ("nº\u00A0", "nº "),
69
+ ("\u00A0:", r":"),
70
+ ("\u00A0ºC", " ºC"),
71
+ ("\u00A0cm", r" cm"),
72
+ ("\u00A0\\?", "?"),
73
+ ("\u00A0\\!", "!"),
74
+ ("\u00A0;", r";"),
75
+ (",\u00A0", r", "),
76
+ (r" +", r" "),
77
+ ]
78
+
79
+ EN_QUOTATION_FOLLOWED_BY_COMMA = [(r'"([,.]+)', r'\g<1>"')]
80
+
81
+ DE_ES_FR_QUOTATION_FOLLOWED_BY_COMMA = [
82
+ (r',"', r'",'),
83
+ (r'(\.+)"(\s*[^<])', r'"\g<1>\g<2>'), # don't fix period at end of sentence
84
+ ]
85
+
86
+ DE_ES_CZ_CS_FR = [
87
+ ("(\\d)\u00A0(\\d)", r"\g<1>,\g<2>"),
88
+ ]
89
+
90
+ OTHER = [
91
+ ("(\\d)\u00A0(\\d)", r"\g<1>.\g<2>"),
92
+ ]
93
+
94
+ # Regex substitutions from replace-unicode-punctuation.perl
95
+ # https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/replace-unicode-punctuation.perl
96
+ REPLACE_UNICODE_PUNCTUATION = [
97
+ (",", ","),
98
+ (r"。\s*", ". "),
99
+ ("、", ","),
100
+ ("”", '"'),
101
+ ("“", '"'),
102
+ ("∶", ":"),
103
+ (":", ":"),
104
+ ("?", "?"),
105
+ ("《", '"'),
106
+ ("》", '"'),
107
+ (")", ")"),
108
+ ("!", "!"),
109
+ ("(", "("),
110
+ (";", ";"),
111
+ ("」", '"'),
112
+ ("「", '"'),
113
+ ("0", "0"),
114
+ ("1", "1"),
115
+ ("2", "2"),
116
+ ("3", "3"),
117
+ ("4", "4"),
118
+ ("5", "5"),
119
+ ("6", "6"),
120
+ ("7", "7"),
121
+ ("8", "8"),
122
+ ("9", "9"),
123
+ (r".\s*", ". "),
124
+ ("~", "~"),
125
+ ("’", "'"),
126
+ ("…", "..."),
127
+ ("━", "-"),
128
+ ("〈", "<"),
129
+ ("〉", ">"),
130
+ ("【", "["),
131
+ ("】", "]"),
132
+ ("%", "%"),
133
+ ]
134
+
135
+ def __init__(
136
+ self,
137
+ lang="en",
138
+ penn=True,
139
+ norm_quote_commas=True,
140
+ norm_numbers=True,
141
+ pre_replace_unicode_punct=False,
142
+ post_remove_control_chars=False,
143
+ ):
144
+ """
145
+ :param language: The two-letter language code.
146
+ :type lang: str
147
+ :param penn: Normalize Penn Treebank style quotations.
148
+ :type penn: bool
149
+ :param norm_quote_commas: Normalize quotations and commas
150
+ :type norm_quote_commas: bool
151
+ :param norm_numbers: Normalize numbers
152
+ :type norm_numbers: bool
153
+ """
154
+ self.substitutions = [
155
+ self.EXTRA_WHITESPACE,
156
+ self.NORMALIZE_UNICODE,
157
+ self.FRENCH_QUOTES,
158
+ self.HANDLE_PSEUDO_SPACES,
159
+ ]
160
+
161
+ if penn: # Adds the penn substitutions after extra_whitespace regexes.
162
+ self.substitutions.insert(1, self.NORMALIZE_UNICODE_IF_NOT_PENN)
163
+
164
+ if norm_quote_commas:
165
+ if lang == "en":
166
+ self.substitutions.append(self.EN_QUOTATION_FOLLOWED_BY_COMMA)
167
+ elif lang in ["de", "es", "fr"]:
168
+ self.substitutions.append(self.DE_ES_FR_QUOTATION_FOLLOWED_BY_COMMA)
169
+
170
+ if norm_numbers:
171
+ if lang in ["de", "es", "cz", "cs", "fr"]:
172
+ self.substitutions.append(self.DE_ES_CZ_CS_FR)
173
+ else:
174
+ self.substitutions.append(self.OTHER)
175
+
176
+ self.substitutions = list(chain(*self.substitutions))
177
+
178
+ self.pre_replace_unicode_punct = pre_replace_unicode_punct
179
+ self.post_remove_control_chars = post_remove_control_chars
180
+
181
+ def normalize(self, text):
182
+ """
183
+ Returns a string with normalized punctuation.
184
+ """
185
+ # Optionally, replace unicode puncts BEFORE normalization.
186
+ if self.pre_replace_unicode_punct:
187
+ text = self.replace_unicode_punct(text)
188
+
189
+ # Actual normalization.
190
+ for regexp, substitution in self.substitutions:
191
+ # print(regexp, substitution)
192
+ text = re.sub(regexp, substitution, str(text))
193
+ # print(text)
194
+
195
+ # Optionally, replace unicode puncts BEFORE normalization.
196
+ if self.post_remove_control_chars:
197
+ text = self.remove_control_chars(text)
198
+
199
+ return text.strip()
200
+
201
+ def replace_unicode_punct(self, text):
202
+ for regexp, substitution in self.REPLACE_UNICODE_PUNCTUATION:
203
+ text = re.sub(regexp, substitution, str(text))
204
+ return text
205
+
206
+ def remove_control_chars(self, text):
207
+ return regex.sub(r"\p{C}", "", text)
208
+
209
+ def _tokenization_norm(text):
210
+ text = text.replace(
211
+ ' ,', ',').replace(
212
+ ' .', '.').replace(
213
+ ' ?', '?').replace(
214
+ ' !', '!').replace(
215
+ ' ;', ';').replace(
216
+ ' \'', '\'').replace(
217
+ ' ’ ', '\'').replace(
218
+ ' :', ':').replace(
219
+ '<newline>', '\n').replace(
220
+ '`` ', '"').replace(
221
+ ' \'\'', '"').replace(
222
+ '\'\'', '"').replace(
223
+ '.. ', '... ').replace(
224
+ ' )', ')').replace(
225
+ '( ', '(').replace(
226
+ ' n\'t', 'n\'t').replace(
227
+ ' i ', ' I ').replace(
228
+ ' i\'', ' I\'').replace(
229
+ '\\\'', '\'').replace(
230
+ '\n ', '\n').strip()
231
+ return text
232
+
233
+
234
+ def _clean_text(text):
235
+ # remove PLM special tokens
236
+ plm_special_tokens = r'(\<pad\>)|(\<s\>)|(\<\/s\>)|(\<unk\>)|(\<\|endoftext\|\>)'
237
+ text = re.sub(plm_special_tokens, "", text)
238
+
239
+ # normalize puncuations
240
+ moses_norm = MosesPunctNormalizer()
241
+ text = moses_norm.normalize(text)
242
+
243
+ # normalize tokenization
244
+ text = _tokenization_norm(text)
245
+
246
+ # remove specific text patterns, e.g,, url, email and phone number
247
+ text = clean(text,
248
+ fix_unicode=True, # fix various unicode errors
249
+ to_ascii=True, # transliterate to closest ASCII representation
250
+ lower=False, # lowercase text
251
+ no_line_breaks=True, # fully strip line breaks as opposed to only normalizing them
252
+ no_urls=True, # replace all URLs with a special token
253
+ no_emails=True, # replace all email addresses with a special token
254
+ no_phone_numbers=True, # replace all phone numbers with a special token
255
+ no_numbers=False, # replace all numbers with a special token
256
+ no_digits=False, # replace all digits with a special token
257
+ no_currency_symbols=False, # replace all currency symbols with a special token
258
+ no_punct=False, # remove punctuations
259
+ replace_with_punct="", # instead of removing punctuations you may replace them
260
+ replace_with_url="",
261
+ replace_with_email="",
262
+ replace_with_phone_number="",
263
+ replace_with_number="<NUMBER>",
264
+ replace_with_digit="<DIGIT>",
265
+ replace_with_currency_symbol="<CUR>",
266
+ lang="en" # set to 'de' for German special handling
267
+ )
268
+
269
+ # keep common puncts only
270
+ punct_pattern = r'[^ A-Za-z0-9.?!,:;\-\[\]\{\}\(\)\'\"]'
271
+ text = re.sub(punct_pattern, '', text)
272
+ # remove specific patterns
273
+ spe_pattern = r'[-\[\]\{\}\(\)\'\"]{2,}'
274
+ text = re.sub(spe_pattern, '', text)
275
+ # remove redundate spaces
276
+ text = " ".join(text.split())
277
+ return text
278
+
279
+ def _rm_line_break(text):
280
+ text = text.replace("\n","\\n")
281
+ text = re.sub(r'(?:\\n)*\\n', r'\\n', text)
282
+ text = re.sub(r'^.{0,3}\\n', '', text)
283
+ text = text.replace("\\n"," ")
284
+ return text
285
+
286
+ def preprocess(text):
287
+ text = _rm_line_break(text)
288
+ text = _clean_text(text)
289
+ return text
290
+
291
+
292
+ def detect(input_text, tokenizer, model, device='cuda', th=-3.08583984375):
293
+ # Tokenize input text
294
+ tokenize_input = tokenizer(input_text, padding=True, truncation=True, max_length=512, return_tensors="pt")
295
+ tensor_input = torch.tensor(tokenize_input["input_ids"]).to(device)
296
+
297
+ # Get model output
298
+ outputs = model(tensor_input)
299
+
300
+ # Calculate score (probability for AI-generated text)
301
+ score = -outputs.logits[0][0].item() # Negative logit for AI-generated probability
302
+
303
+ return score
304
+
305
+ def load_jsonl(file_path):
306
+ out = []
307
+ with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
308
+ for line in jsonl_file:
309
+ item = json.loads(line)
310
+ out.append(item)
311
+ print(f"Loaded {len(out)} examples from {file_path}")
312
+ return out
313
+
314
+ def dict2str(metrics):
315
+ out_str=''
316
+ for key in metrics.keys():
317
+ out_str+=f"{key}:{metrics[key]} "
318
+ return out_str
319
+
320
+ def experiment(args):
321
+ # Initialize MAGE model
322
+ model_dir = "yaful/MAGE"
323
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
324
+ model = AutoModelForSequenceClassification.from_pretrained(model_dir).cuda()
325
+
326
+ logging.info(f"Test in {args.test_data_path}")
327
+ test_data = load_jsonl(args.test_data_path)
328
+
329
+ random.seed(args.seed)
330
+ torch.manual_seed(args.seed)
331
+ np.random.seed(args.seed)
332
+ random.shuffle(test_data)
333
+
334
+ predictions = []
335
+ labels = []
336
+
337
+ for i, item in tqdm(enumerate(test_data), total=len(test_data)):
338
+ text = item["text"]
339
+ label = item["label"]
340
+ src = item["src"]
341
+
342
+ # preprocess the text
343
+ text = preprocess(text)
344
+
345
+ # MAGE detection
346
+ score = detect(text, tokenizer, model)
347
+
348
+ # Determine the label and append to predictions and labels
349
+ if 'human' in src:
350
+ labels.append(1)
351
+ else:
352
+ labels.append(0)
353
+
354
+ predictions.append(score)
355
+
356
+ # Compute metrics
357
+ metric = evaluate_metrics(labels, predictions)
358
+ print(dict2str(metric))
359
+
360
+ # Save results
361
+ with _LOG_PATH.open("a+", encoding="utf-8") as f:
362
+ f.write(f"MAGE {args.test_data_path}\n")
363
+ f.write(f"{dict2str(metric)}\n")
364
+
365
+ def build_argument_parser() -> argparse.ArgumentParser:
366
+ parser = argparse.ArgumentParser()
367
+ parser.add_argument(
368
+ '--test_data_path',
369
+ type=str,
370
+ default='/path/to/RealBench/DetectRL/Multi_Attack/all_attacks_llm_test.jsonl',
371
+ help="Path to the test data. could be several files with ','. Note that the data should have been perturbed.",
372
+ )
373
+ parser.add_argument('--seed', default=2023, type=int, required=False)
374
+ return parser
375
+
376
+
377
+ def main(argv: Sequence[str] | None = None) -> None:
378
+ parser = build_argument_parser()
379
+ args = parser.parse_args(argv)
380
+ experiment(args)
381
+
382
+
383
+ if __name__ == '__main__':
384
+ main()
detree/utils/detectors/RADAR.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import random
5
+ from pathlib import Path
6
+ from typing import Sequence
7
+
8
+ import numpy as np
9
+ import torch
10
+ import torch.nn.functional as F
11
+ import transformers
12
+ from tqdm import tqdm
13
+
14
+ from ..utils import evaluate_metrics
15
+
16
+ _LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
17
+ _LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
18
+
19
+ def load_jsonl(file_path):
20
+ out = []
21
+ with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
22
+ for line in jsonl_file:
23
+ item = json.loads(line)
24
+ out.append(item)
25
+ print(f"Loaded {len(out)} examples from {file_path}")
26
+ return out
27
+
28
+ def dict2str(metrics):
29
+ out_str=''
30
+ for key in metrics.keys():
31
+ out_str+=f"{key}:{metrics[key]} "
32
+ return out_str
33
+
34
+ def experiment(args):
35
+ # Initialize RADAR detector model
36
+ detector = transformers.AutoModelForSequenceClassification.from_pretrained("TrustSafeAI/RADAR-Vicuna-7B",device_map="auto")
37
+ tokenizer = transformers.AutoTokenizer.from_pretrained("TrustSafeAI/RADAR-Vicuna-7B")
38
+ detector.eval()
39
+
40
+ logging.info(f"Test in {args.test_data_path}")
41
+ test_data = load_jsonl(args.test_data_path)
42
+
43
+ random.seed(args.seed)
44
+ torch.manual_seed(args.seed)
45
+ np.random.seed(args.seed)
46
+ random.shuffle(test_data)
47
+
48
+ predictions = []
49
+ labels = []
50
+
51
+ for i, item in tqdm(enumerate(test_data), total=len(test_data)):
52
+ text = item["text"]
53
+ label = item["label"]
54
+ src = item["src"]
55
+
56
+ # Tokenize input text
57
+ inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
58
+ inputs = {k: v.cuda() for k, v in inputs.items()}
59
+
60
+ # Get model output
61
+ with torch.no_grad():
62
+ output_probs = F.log_softmax(detector(**inputs).logits, -1)[:, 0].exp().tolist()
63
+
64
+ # Determine the label and append to predictions and labels
65
+ if 'human' in src:
66
+ labels.append(0)
67
+ else:
68
+ labels.append(1)
69
+
70
+ predictions.append(output_probs[0]) # Probabilities for AI-generated text
71
+
72
+ # Compute metrics
73
+ metric = evaluate_metrics(labels, predictions)
74
+ print(dict2str(metric))
75
+
76
+ # Save results
77
+ with _LOG_PATH.open("a+", encoding="utf-8") as f:
78
+ f.write(f"RADAR {args.test_data_path}\n")
79
+ f.write(f"{dict2str(metric)}\n")
80
+
81
+ def build_argument_parser() -> argparse.ArgumentParser:
82
+ parser = argparse.ArgumentParser()
83
+ parser.add_argument(
84
+ '--test_data_path',
85
+ type=str,
86
+ default='/path/to/RealBench/DetectRL/Multi_Domain/all_multi_domains_test.jsonl',
87
+ help="Path to the test data. could be several files with ','. Note that the data should have been perturbed.",
88
+ )
89
+ parser.add_argument('--seed', default=2023, type=int, required=False)
90
+ return parser
91
+
92
+
93
+ def main(argv: Sequence[str] | None = None) -> None:
94
+ parser = build_argument_parser()
95
+ args = parser.parse_args(argv)
96
+ experiment(args)
97
+
98
+
99
+ if __name__ == '__main__':
100
+ main()
detree/utils/detectors/UAR_eval.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Sequence
6
+
7
+ import numpy as np; np.random.seed(43)
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from torch.utils.data import DataLoader, Dataset
11
+ from tqdm import tqdm
12
+ from transformers import AutoModel, AutoTokenizer
13
+
14
+ from ..utils import evaluate_metrics
15
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
16
+
17
+ _LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
18
+ _LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
19
+ class PassagesDataset(Dataset):
20
+ def __init__(self, data):
21
+
22
+ self.passages = data
23
+
24
+ def __len__(self):
25
+ return len(self.passages)
26
+
27
+ def __getitem__(self, idx):
28
+ data_now = self.passages[idx]
29
+ text = data_now['text']
30
+ label = int(data_now['label'])==0
31
+ ids = data_now['id']
32
+ return text, int(label), int(ids)
33
+
34
+ def load_jsonl(file_path,need_human=True):
35
+ out = []
36
+ with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
37
+ for line in jsonl_file:
38
+ item = json.loads(line)
39
+ if item['src'] =='human' and need_human==False:
40
+ continue
41
+ out.append(item)
42
+ print(f"Loaded {len(out)} examples from {file_path}")
43
+ return out
44
+
45
+ def dict2str(metrics):
46
+ out_str=''
47
+ for key in metrics.keys():
48
+ out_str+=f"{key}:{metrics[key]} "
49
+ return out_str
50
+
51
+ def gen_embeddings(data, model, tokenizer):
52
+ device = torch.device("cuda")
53
+ dataset = PassagesDataset(data)
54
+ dataloder = DataLoader(dataset, batch_size=32, num_workers=8, shuffle=False)
55
+ labels, embeddings = [], []
56
+ with torch.no_grad():
57
+ for batch in tqdm(dataloder,total=len(dataloder)):
58
+ texts,label,ids= batch
59
+ encoded_batch = tokenizer.batch_encode_plus(
60
+ texts,
61
+ return_tensors="pt",
62
+ max_length=512,
63
+ padding='max_length',
64
+ truncation=True,
65
+ )
66
+ for key in encoded_batch:
67
+ encoded_batch[key] = encoded_batch[key].unsqueeze(1).to(device)
68
+
69
+
70
+ now_embeddings = model(**encoded_batch)
71
+ now_embeddings = F.normalize(now_embeddings, p=2, dim=-1)
72
+ embeddings.append(now_embeddings.cpu())
73
+ labels.append(label.cpu())
74
+ labels = torch.cat(labels, dim=0).numpy()
75
+ embeddings = torch.cat(embeddings, dim=0).numpy()
76
+
77
+ return embeddings, labels
78
+
79
+ def run(opt):
80
+ device = torch.device("cuda")
81
+ model = AutoModel.from_pretrained("rrivera1849/LUAR-CRUD", trust_remote_code=True)
82
+ model.to(device)
83
+ model.eval()
84
+ tokenizer = AutoTokenizer.from_pretrained("rrivera1849/LUAR-CRUD")
85
+ database_data = load_jsonl(opt.database_path,need_human=False)
86
+ test_data = load_jsonl(opt.test_dataset_path)
87
+ print("Database Data Size:", len(database_data), "Test Data Size:", len(test_data))
88
+ database_embeddings, database_labels = gen_embeddings(database_data, model, tokenizer)
89
+ test_embeddings, test_labels = gen_embeddings(test_data, model, tokenizer)
90
+ dis = test_embeddings @ database_embeddings.T
91
+ dis = dis.min(axis=1)
92
+ metric = evaluate_metrics(test_labels, dis)
93
+ print(dict2str(metric))
94
+ with _LOG_PATH.open("a+", encoding="utf-8") as f:
95
+ f.write(f"UAR {opt.test_dataset_path}\n")
96
+ f.write(f"{dict2str(metric)}\n")
97
+
98
+ def build_argument_parser() -> argparse.ArgumentParser:
99
+ parser = argparse.ArgumentParser()
100
+ parser.add_argument("--database_path", type=str, default="/path/to/RealBench/MAGE_Unseen/Unseen/5shot/train_0.jsonl")
101
+ parser.add_argument("--test_dataset_path", type=str, default="/path/to/RealBench/MAGE_Unseen/Unseen/5shot/test_0.jsonl")
102
+ return parser
103
+
104
+
105
+ def main(argv: Sequence[str] | None = None) -> None:
106
+ parser = build_argument_parser()
107
+ opt = parser.parse_args(argv)
108
+ run(opt)
109
+
110
+
111
+ if __name__ == "__main__":
112
+ main()
113
+ # text = ['The quick brown fox jumps over the lazy dog.','There is a cat on the roof.']
114
+ # encoded_batch = tokenizer.batch_encode_plus(
115
+ # text,
116
+ # return_tensors="pt",
117
+ # max_length=512,
118
+ # padding='max_length',
119
+ # truncation=True,
120
+ # )
121
+ # for key in encoded_batch:
122
+ # encoded_batch[key] = encoded_batch[key].unsqueeze(1).to(device)
123
+
124
+ # with torch.no_grad():
125
+ # embeddings = model(**encoded_batch)
126
+ # print(embeddings.shape)
detree/utils/detectors/binoculars_detector.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+ import os
3
+ import numpy as np
4
+ import torch
5
+ import transformers
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ import torch
8
+ import transformers
9
+
10
+ ce_loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
11
+ softmax_fn = torch.nn.Softmax(dim=-1)
12
+
13
+
14
+ torch.set_grad_enabled(False)
15
+
16
+ huggingface_config = {
17
+ # Only required for private models from Huggingface (e.g. LLaMA models)
18
+ "TOKEN": os.environ.get("HF_TOKEN", None)
19
+ }
20
+
21
+ # selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
22
+ BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843 # optimized for f1-score
23
+ BINOCULARS_FPR_THRESHOLD = 0.8536432310785527 # optimized for low-fpr [chosen at 0.01%]
24
+
25
+ DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
26
+ DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
27
+
28
+
29
+ def assert_tokenizer_consistency(model_id_1, model_id_2):
30
+ identical_tokenizers = (
31
+ AutoTokenizer.from_pretrained(model_id_1).vocab
32
+ == AutoTokenizer.from_pretrained(model_id_2).vocab
33
+ )
34
+ if not identical_tokenizers:
35
+ raise ValueError(f"Tokenizers are not identical for {model_id_1} and {model_id_2}.")
36
+
37
+
38
+ def perplexity(encoding: transformers.BatchEncoding,
39
+ logits: torch.Tensor,
40
+ median: bool = False,
41
+ temperature: float = 1.0):
42
+ shifted_logits = logits[..., :-1, :].contiguous() / temperature
43
+ shifted_labels = encoding.input_ids[..., 1:].contiguous()
44
+ shifted_attention_mask = encoding.attention_mask[..., 1:].contiguous()
45
+
46
+ if median:
47
+ ce_nan = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels).
48
+ masked_fill(~shifted_attention_mask.bool(), float("nan")))
49
+ ppl = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
50
+
51
+ else:
52
+ ppl = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels) *
53
+ shifted_attention_mask).sum(1) / shifted_attention_mask.sum(1)
54
+ ppl = ppl.to("cpu").float().numpy()
55
+
56
+ return ppl
57
+
58
+
59
+ def entropy(p_logits: torch.Tensor,
60
+ q_logits: torch.Tensor,
61
+ encoding: transformers.BatchEncoding,
62
+ pad_token_id: int,
63
+ median: bool = False,
64
+ sample_p: bool = False,
65
+ temperature: float = 1.0):
66
+ vocab_size = p_logits.shape[-1]
67
+ total_tokens_available = q_logits.shape[-2]
68
+ p_scores, q_scores = p_logits / temperature, q_logits / temperature
69
+
70
+ p_proba = softmax_fn(p_scores).view(-1, vocab_size)
71
+
72
+ if sample_p:
73
+ p_proba = torch.multinomial(p_proba.view(-1, vocab_size), replacement=True, num_samples=1).view(-1)
74
+
75
+ q_scores = q_scores.view(-1, vocab_size)
76
+
77
+ ce = ce_loss_fn(input=q_scores, target=p_proba).view(-1, total_tokens_available)
78
+ padding_mask = (encoding.input_ids != pad_token_id).type(torch.uint8)
79
+
80
+ if median:
81
+ ce_nan = ce.masked_fill(~padding_mask.bool(), float("nan"))
82
+ agg_ce = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
83
+ else:
84
+ agg_ce = (((ce * padding_mask).sum(1) / padding_mask.sum(1)).to("cpu").float().numpy())
85
+
86
+ return agg_ce
87
+
88
+
89
+ class Binoculars(object):
90
+ def __init__(self,
91
+ observer_name_or_path: str = "tiiuae/falcon-7b",
92
+ performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
93
+ use_bfloat16: bool = True,
94
+ max_token_observed: int = 512,
95
+ mode: str = "low-fpr",
96
+ ) -> None:
97
+ assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
98
+
99
+ self.change_mode(mode)
100
+ self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path,
101
+ device_map={"": DEVICE_1},
102
+ trust_remote_code=True,
103
+ torch_dtype=torch.bfloat16 if use_bfloat16
104
+ else torch.float32,
105
+ token=huggingface_config["TOKEN"]
106
+ )
107
+ self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path,
108
+ device_map={"": DEVICE_2},
109
+ trust_remote_code=True,
110
+ torch_dtype=torch.bfloat16 if use_bfloat16
111
+ else torch.float32,
112
+ token=huggingface_config["TOKEN"]
113
+ )
114
+ self.observer_model.eval()
115
+ self.performer_model.eval()
116
+
117
+ self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
118
+ if not self.tokenizer.pad_token:
119
+ self.tokenizer.pad_token = self.tokenizer.eos_token
120
+ self.max_token_observed = max_token_observed
121
+
122
+ def change_mode(self, mode: str) -> None:
123
+ if mode == "low-fpr":
124
+ self.threshold = BINOCULARS_FPR_THRESHOLD
125
+ elif mode == "accuracy":
126
+ self.threshold = BINOCULARS_ACCURACY_THRESHOLD
127
+ else:
128
+ raise ValueError(f"Invalid mode: {mode}")
129
+
130
+ def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
131
+ batch_size = len(batch)
132
+ encodings = self.tokenizer(
133
+ batch,
134
+ return_tensors="pt",
135
+ padding="longest" if batch_size > 1 else False,
136
+ truncation=True,
137
+ max_length=self.max_token_observed,
138
+ return_token_type_ids=False).to(self.observer_model.device)
139
+ return encodings
140
+
141
+ @torch.inference_mode()
142
+ def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor:
143
+ observer_logits = self.observer_model(**encodings.to(DEVICE_1)).logits
144
+ performer_logits = self.performer_model(**encodings.to(DEVICE_2)).logits
145
+ if DEVICE_1 != "cpu":
146
+ torch.cuda.synchronize()
147
+ return observer_logits, performer_logits
148
+
149
+ def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]:
150
+ batch = [input_text] if isinstance(input_text, str) else input_text
151
+ encodings = self._tokenize(batch)
152
+ observer_logits, performer_logits = self._get_logits(encodings)
153
+ ppl = perplexity(encodings, performer_logits)
154
+ x_ppl = entropy(observer_logits.to(DEVICE_1), performer_logits.to(DEVICE_1),
155
+ encodings.to(DEVICE_1), self.tokenizer.pad_token_id)
156
+ binoculars_scores = ppl / x_ppl
157
+ binoculars_scores = binoculars_scores.tolist()
158
+ return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores
159
+
160
+ def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]:
161
+ binoculars_scores = np.array(self.compute_score(input_text))
162
+ pred = np.where(binoculars_scores < self.threshold,
163
+ "Most likely AI-generated",
164
+ "Most likely human-generated"
165
+ ).tolist()
166
+ return pred
detree/utils/detectors/binoculars_evaluation.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import random
5
+ from pathlib import Path
6
+ from typing import Sequence
7
+
8
+ import numpy as np
9
+ import torch
10
+ from tqdm import tqdm
11
+
12
+ from .binoculars_detector import Binoculars
13
+ from ..utils import evaluate_metrics
14
+
15
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
16
+
17
+ _LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
18
+ _LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
19
+
20
+ def load_jsonl(file_path):
21
+ out = []
22
+ with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
23
+ for line in jsonl_file:
24
+ item = json.loads(line)
25
+ out.append(item)
26
+ print(f"Loaded {len(out)} examples from {file_path}")
27
+ return out
28
+
29
+ def dict2str(metrics):
30
+ out_str=''
31
+ for key in metrics.keys():
32
+ out_str+=f"{key}:{metrics[key]} "
33
+ return out_str
34
+
35
+ def experiment(args):
36
+ # Initialize Binoculars (experiments in paper use the "accuracy" mode threshold wherever applicable)
37
+ bino = Binoculars(mode="accuracy", max_token_observed=args.tokens_seen)
38
+
39
+ logging.info(f"Test in {args.test_data_path}")
40
+ test_data = load_jsonl(args.test_data_path)
41
+
42
+ random.seed(args.seed)
43
+ torch.manual_seed(args.seed)
44
+ np.random.seed(args.seed)
45
+ random.shuffle(test_data)
46
+ predictions = []
47
+ labels = []
48
+ for i, item in tqdm(enumerate(test_data), total=len(test_data)):
49
+ text = item["text"]
50
+ label = item["label"]
51
+ src = item["src"]
52
+ bino_score = -bino.compute_score(text)
53
+
54
+ if bino_score is None or np.isnan(bino_score) or np.isinf(bino_score):
55
+ bino_score = 0
56
+ if 'human' in src:
57
+ labels.append(0)
58
+ else:
59
+ labels.append(1)
60
+ predictions.append(bino_score)
61
+ metric = evaluate_metrics(labels, predictions)
62
+ print(dict2str(metric))
63
+ with _LOG_PATH.open("a+", encoding="utf-8") as f:
64
+ f.write(f"binoculars {args.test_data_path}\n")
65
+ f.write(f"{dict2str(metric)}\n")
66
+
67
+
68
+ def build_argument_parser() -> argparse.ArgumentParser:
69
+ parser = argparse.ArgumentParser()
70
+ parser.add_argument(
71
+ '--test_data_path',
72
+ type=str,
73
+ default='/path/to/RealBench/Deepfake/no_attack/test.jsonl',
74
+ help="Path to the test data. could be several files with ','. Note that the data should have been perturbed.",
75
+ )
76
+ parser.add_argument("--tokens_seen", type=int, default=512, help="Number of tokens seen by the model")
77
+ parser.add_argument('--DEVICE', default="cuda", type=str, required=False)
78
+ parser.add_argument('--seed', default=2023, type=int, required=False)
79
+ return parser
80
+
81
+
82
+ def main(argv: Sequence[str] | None = None) -> None:
83
+ parser = build_argument_parser()
84
+ args = parser.parse_args(argv)
85
+ experiment(args)
86
+
87
+
88
+ if __name__ == '__main__':
89
+ main()