File size: 4,553 Bytes
d389c0e
 
 
 
 
 
 
 
 
 
 
 
 
1008577
 
 
 
6c92a20
d389c0e
 
 
 
 
 
 
1008577
b178707
d389c0e
 
 
1008577
 
 
 
 
 
 
6c92a20
 
 
 
 
 
 
b178707
 
 
 
 
 
 
 
 
 
6c92a20
 
d389c0e
 
 
 
 
 
 
 
 
 
1008577
 
 
 
 
 
 
 
d389c0e
 
 
 
 
 
 
 
6c92a20
d389c0e
 
 
 
6c92a20
 
d389c0e
 
 
 
 
 
 
6c92a20
d389c0e
 
 
 
c939101
d389c0e
c939101
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from typing import Union

import numpy as np
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer

from config import huggingface_config
from .utils import assert_tokenizer_consistency
from .metrics import perplexity, entropy

torch.set_grad_enabled(False)

# selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843  # optimized for f1-score
BINOCULARS_FPR_THRESHOLD = 0.8536432310785527  # optimized for low-fpr

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

class Binoculars(object):
    def __init__(self,
                 observer_name_or_path: str = "tiiuae/falcon-7b",
                 performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
                 use_bfloat16: bool = True,
                 max_token_observed: int = 512,
                 mode: str = "low-fpr",
                 quantize: bool = True
                 ) -> None:
        assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)

        if mode == "low-fpr":
            self.threshold = BINOCULARS_FPR_THRESHOLD
        elif mode == "accuracy":
            self.threshold = BINOCULARS_ACCURACY_THRESHOLD
        else:
            raise ValueError(f"Invalid mode: {mode}")

        # Load models with memory-efficient settings
        model_kwargs = {
            "device_map": "auto",
            "trust_remote_code": True,
            "token": huggingface_config["TOKEN"]
        }

        if quantize:
            try:
                import bitsandbytes as bnb
                model_kwargs["load_in_8bit"] = True
            except ImportError:
                print("bitsandbytes not available. Falling back to full precision.")
                model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32
        else:
            model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32

        self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path, **model_kwargs)
        self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path, **model_kwargs)

        self.observer_model.eval()
        self.performer_model.eval()

        self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
        if not self.tokenizer.pad_token:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self.max_token_observed = max_token_observed

    def change_mode(self, mode: str) -> None:
        if mode == "low-fpr":
            self.threshold = BINOCULARS_FPR_THRESHOLD
        elif mode == "accuracy":
            self.threshold = BINOCULARS_ACCURACY_THRESHOLD
        else:
            raise ValueError(f"Invalid mode: {mode}")

    def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
        batch_size = len(batch)
        encodings = self.tokenizer(
            batch,
            return_tensors="pt",
            padding="longest" if batch_size > 1 else False,
            truncation=True,
            max_length=self.max_token_observed,
            return_token_type_ids=False)
        return encodings

    @torch.inference_mode()
    def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor:
        observer_logits = self.observer_model(**encodings).logits
        performer_logits = self.performer_model(**encodings).logits
        return observer_logits, performer_logits

    def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]:
        batch = [input_text] if isinstance(input_text, str) else input_text
        encodings = self._tokenize(batch)
        observer_logits, performer_logits = self._get_logits(encodings)
        ppl = perplexity(encodings, performer_logits)
        x_ppl = entropy(observer_logits, performer_logits, encodings, self.tokenizer.pad_token_id)
        binoculars_scores = ppl / x_ppl
        binoculars_scores = binoculars_scores.tolist()
        return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores

    def predict(self, input_text: Union[list[str], str]) -> Union[str, list[str]]:
        binoculars_scores = np.array(self.compute_score(input_text))
        
        if isinstance(input_text, str):
            return "Most likely AI-generated" if binoculars_scores < self.threshold else "Most likely human-generated"
        else:
            return ["Most likely AI-generated" if score < self.threshold else "Most likely human-generated" for score in binoculars_scores]