Binoculars

Paused

App Files Files Community

ahans30 commited on Feb 1

Commit

1008577

•

1 Parent(s): 45f9856

Added detection mode

Browse files

Files changed (2) hide show

binoculars/detector.py +24 -2
demo/demo.py +32 -11

binoculars/detector.py CHANGED Viewed

@@ -11,7 +11,10 @@ from .metrics import perplexity, entropy
 torch.set_grad_enabled(False)
-GLOBAL_BINOCULARS_THRESHOLD = 0.9015310749276843  # selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
 DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
 DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
@@ -22,9 +25,17 @@ class Binoculars(object):
                  performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
                  use_bfloat16: bool = True,
                  max_token_observed: int = 512,
                  ) -> None:
         assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
         self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path,
                                                                    device_map={"": DEVICE_1},
                                                                    trust_remote_code=True,
@@ -49,6 +60,14 @@ class Binoculars(object):
         self.max_token_observed = max_token_observed
     def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
         batch_size = len(batch)
         encodings = self.tokenizer(
@@ -81,5 +100,8 @@ class Binoculars(object):
     def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]:
         binoculars_scores = np.array(self.compute_score(input_text))
-        pred = np.where(binoculars_scores < GLOBAL_BINOCULARS_THRESHOLD, "AI-Generated", "Human-Generated").tolist()
         return pred

 torch.set_grad_enabled(False)
+# selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
+BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843  # optimized for f1-score
+BINOCULARS_FPR_THRESHOLD = 0.8536432310785527  # optimized for low-fpr
 DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
 DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
                  performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
                  use_bfloat16: bool = True,
                  max_token_observed: int = 512,
+                 mode: str = "low-fpr",
                  ) -> None:
         assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)
+        if mode == "low-fpr":
+            self.threshold = BINOCULARS_FPR_THRESHOLD
+        elif mode == "accuracy":
+            self.threshold = BINOCULARS_ACCURACY_THRESHOLD
+        else:
+            raise ValueError(f"Invalid mode: {mode}")
         self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path,
                                                                    device_map={"": DEVICE_1},
                                                                    trust_remote_code=True,
         self.max_token_observed = max_token_observed
+    def change_mode(self, mode: str) -> None:
+        if mode == "low-fpr":
+            self.threshold = BINOCULARS_FPR_THRESHOLD
+        elif mode == "accuracy":
+            self.threshold = BINOCULARS_ACCURACY_THRESHOLD
+        else:
+            raise ValueError(f"Invalid mode: {mode}")
     def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding:
         batch_size = len(batch)
         encodings = self.tokenizer(
     def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]:
         binoculars_scores = np.array(self.compute_score(input_text))
+        pred = np.where(binoculars_scores < self.threshold,
+                        "Most likely AI-generated",
+                        "Most likely human-generated"
+                        ).tolist()
         return pred

demo/demo.py CHANGED Viewed

@@ -19,6 +19,16 @@ def run_detector(input_str):
     return f"{BINO.predict(input_str)}"
 # def load_set(progress=gr.Progress()):
 #     tokens = [None] * 24
 #     for count in progress.tqdm(tokens, desc="Counting Tokens..."):
@@ -41,6 +51,8 @@ css = """
 }
 """
 capybara_problem = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
 with gr.Blocks(css=css,
@@ -60,10 +72,16 @@ with gr.Blocks(css=css,
     with gr.Row():
         input_box = gr.Textbox(value=capybara_problem, placeholder="Enter text here", lines=8, label="Input Text", )
     with gr.Row():
-        clear_button = gr.ClearButton()
         submit_button = gr.Button("Run Binoculars", variant="primary")
     with gr.Row():
-        output_text = gr.Textbox(label="Prediction", value="AI-Generated")
     with gr.Row():
         gr.HTML("<p><p><p>")
@@ -76,16 +94,18 @@ with gr.Blocks(css=css,
         gr.Markdown(
             """
             - `Accuracy` :
-                - AI-generated text detectors aim for accuracy, but achieving 100% is challenging.
-                - The provided prediction is for demo purposes only and should not be considered a consumer product.
                 - Users are advised to exercise discretion, and we assume no liability for any use.
-            - `Detection Use Cases` :
-                - In this work, our focus is to achieve an ultra-low false positive rate, crucial for sensitive downstream use case (e.g., avoiding false accusations in academic honesty cases).
-                - We find optimal application in content moderation, for example in detecting AI-generated reviews on platforms like Amazon, Google, Yelp, etc. This represents one of the most compelling and noteworthy use cases for Binoculars.
-            - `Human Supervision Advisory` :
-                - Strongly caution against using Binoculars (or any detector) without human supervision.
-            - `Performance by Language` :
-                - As noted in our paper, Binoculars exhibit superior detection performance in the English language compared to other languages.
             """
         )
@@ -109,3 +129,4 @@ with gr.Blocks(css=css,
     # clear_button.click(lambda x: input_box., )
     submit_button.click(run_detector, inputs=input_box, outputs=output_text)
     clear_button.click(lambda: ("", ""), outputs=[input_box, output_text])

     return f"{BINO.predict(input_str)}"
+def change_mode(mode):
+    if mode == "Low False Positive Rate":
+        BINO.change_mode("low-fpr")
+    elif mode == "High Accuracy":
+        BINO.change_mode("accuracy")
+    else:
+        gr.Error(f"Invalid mode selected.")
+    return mode
 # def load_set(progress=gr.Progress()):
 #     tokens = [None] * 24
 #     for count in progress.tqdm(tokens, desc="Counting Tokens..."):
 }
 """
+# Most likely human generated, #most likely AI written
 capybara_problem = '''Dr. Capy Cosmos, a capybara unlike any other, astounded the scientific community with his groundbreaking research in astrophysics. With his keen sense of observation and unparalleled ability to interpret cosmic data, he uncovered new insights into the mysteries of black holes and the origins of the universe. As he peered through telescopes with his large, round eyes, fellow researchers often remarked that it seemed as if the stars themselves whispered their secrets directly to him. Dr. Cosmos not only became a beacon of inspiration to aspiring scientists but also proved that intellect and innovation can be found in the most unexpected of creatures.'''
 with gr.Blocks(css=css,
     with gr.Row():
         input_box = gr.Textbox(value=capybara_problem, placeholder="Enter text here", lines=8, label="Input Text", )
     with gr.Row():
+        # dropdown option for mode
+        dropdown_mode = gr.Dropdown(["Low False Positive Rate", "High Accuracy"],
+                                    label="Mode",
+                                    show_label=True,
+                                    value="Low False Positive Rate"
+                                    )
         submit_button = gr.Button("Run Binoculars", variant="primary")
+        clear_button = gr.ClearButton()
     with gr.Row():
+        output_text = gr.Textbox(label="Prediction", value="Most likely AI-Generated")
     with gr.Row():
         gr.HTML("<p><p><p>")
         gr.Markdown(
             """
             - `Accuracy` :
+                - AI-generated text detectors aim for accuracy, but no detector is perfect.
+                - If you choose "high accuracy" mode, then the threshold between human and machine is chosen to maximize the F1 score on our validation dataset.
+                - If you choose the "low false-positive rate" mode, the threshold for declaring something to be AI generated will be set so that the false positive (human text wrongly flagged as AI) rate is below 0.01% on our validation set.
+                - The provided prediction is for demonstration purposes only. This is not offered as a consumer product.
                 - Users are advised to exercise discretion, and we assume no liability for any use.
+            - `Recommended detection Use Cases` :
+                - In this work, our focus is on achieving a low false positive rate, crucial for sensitive downstream use cases where false accusations are highly undesireable.
+                - The main focus of our research is on content moderation, e.g., detecting AI-generated reviews on Amazon/Yelp, detecting AI generated social media posts and news, etc. We feel this application space is most compelling, as LLM detection tools are best used by professionals in conjunction with a broader set of moderation tools and policies.
+            - `Known weaknesses` :
+                - As noted in our paper, Binoculars exhibits superior detection performance in the English language compared to other languages.  Non-English text makes it more likely that results will default to "human written."
+                - Binoculars considers verbatim memorized texts to be "AI generated." For example, most language models have memorized and can recite the US constitution. For this reason, text from the constitution, or other highly memorized sources, may be classified as AI written.
+                - We recommend using 200-300 words of text at a time. Fewer words make detection difficult, as can using more than 1000 words. Binoculars will be more likely to default to the "human written" category if too few tokens are provided.
             """
         )
     # clear_button.click(lambda x: input_box., )
     submit_button.click(run_detector, inputs=input_box, outputs=output_text)
     clear_button.click(lambda: ("", ""), outputs=[input_box, output_text])
+    dropdown_mode.change(change_mode, inputs=[dropdown_mode], outputs=[dropdown_mode])