Spaces:
Runtime error
Runtime error
File size: 4,553 Bytes
d389c0e 1008577 6c92a20 d389c0e 1008577 b178707 d389c0e 1008577 6c92a20 b178707 6c92a20 d389c0e 1008577 d389c0e 6c92a20 d389c0e 6c92a20 d389c0e 6c92a20 d389c0e c939101 d389c0e c939101 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
from typing import Union
import numpy as np
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from config import huggingface_config
from .utils import assert_tokenizer_consistency
from .metrics import perplexity, entropy
torch.set_grad_enabled(False)
# selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843 # optimized for f1-score
BINOCULARS_FPR_THRESHOLD = 0.8536432310785527 # optimized for low-fpr
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
class Binoculars(object):
def __init__(self,
observer_name_or_path: str = "tiiuae/falcon-7b",
performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
use_bfloat16: bool = True,
max_token_observed: int = 512,
mode: str = "low-fpr",
quantize: bool = True
) -> None:
assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
if mode == "low-fpr":
self.threshold = BINOCULARS_FPR_THRESHOLD
elif mode == "accuracy":
self.threshold = BINOCULARS_ACCURACY_THRESHOLD
else:
raise ValueError(f"Invalid mode: {mode}")
# Load models with memory-efficient settings
model_kwargs = {
"device_map": "auto",
"trust_remote_code": True,
"token": huggingface_config["TOKEN"]
}
if quantize:
try:
import bitsandbytes as bnb
model_kwargs["load_in_8bit"] = True
except ImportError:
print("bitsandbytes not available. Falling back to full precision.")
model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32
else:
model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32
self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path, **model_kwargs)
self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path, **model_kwargs)
self.observer_model.eval()
self.performer_model.eval()
self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
if not self.tokenizer.pad_token:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.max_token_observed = max_token_observed
def change_mode(self, mode: str) -> None:
if mode == "low-fpr":
self.threshold = BINOCULARS_FPR_THRESHOLD
elif mode == "accuracy":
self.threshold = BINOCULARS_ACCURACY_THRESHOLD
else:
raise ValueError(f"Invalid mode: {mode}")
def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
batch_size = len(batch)
encodings = self.tokenizer(
batch,
return_tensors="pt",
padding="longest" if batch_size > 1 else False,
truncation=True,
max_length=self.max_token_observed,
return_token_type_ids=False)
return encodings
@torch.inference_mode()
def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor:
observer_logits = self.observer_model(**encodings).logits
performer_logits = self.performer_model(**encodings).logits
return observer_logits, performer_logits
def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]:
batch = [input_text] if isinstance(input_text, str) else input_text
encodings = self._tokenize(batch)
observer_logits, performer_logits = self._get_logits(encodings)
ppl = perplexity(encodings, performer_logits)
x_ppl = entropy(observer_logits, performer_logits, encodings, self.tokenizer.pad_token_id)
binoculars_scores = ppl / x_ppl
binoculars_scores = binoculars_scores.tolist()
return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores
def predict(self, input_text: Union[list[str], str]) -> Union[str, list[str]]:
binoculars_scores = np.array(self.compute_score(input_text))
if isinstance(input_text, str):
return "Most likely AI-generated" if binoculars_scores < self.threshold else "Most likely human-generated"
else:
return ["Most likely AI-generated" if score < self.threshold else "Most likely human-generated" for score in binoculars_scores] |