Spaces:

anonymous-aardvark
/

submission-2841-demo

Paused

App Files Files Community

anonymous-aardvark commited on Mar 18, 2023

Commit

85ecf02

•

0 Parent(s):

initial commit

Browse files

Files changed (12) hide show

.gitattributes +34 -0
README.md +13 -0
app.py +46 -0
demo_watermark.py +914 -0
homoglyph_data/__init__.py +40 -0
homoglyph_data/categories.json +0 -0
homoglyph_data/confusables_sept2022.json +0 -0
homoglyph_data/languages.json +34 -0
homoglyphs.py +265 -0
normalizers.py +202 -0
requirements.txt +10 -0
watermark_processor.py +279 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Submission 2841 Demo
+emoji: 📊
+colorFrom: pink
+colorTo: gray
+sdk: gradio
+sdk_version: 3.21.0
+app_file: app.py
+pinned: false
+python_version: 3.10.6
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# coding=utf-8
+# Copyright 2023 Anonymous Authors of "A Watermark for Large Language Models"
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from argparse import Namespace
+args = Namespace()
+arg_dict = {
+    'run_gradio': True,
+    'demo_public': False,
+    'model_name_or_path': 'bigscience/bloom',
+    'load_fp16' : False,
+    'prompt_max_length': None,
+    'max_new_tokens': 200,
+    'generation_seed': 123,
+    'use_sampling': True,
+    'n_beams': 1,
+    'sampling_temp': 0.7,
+    'use_gpu': True,
+    'seeding_scheme': 'simple_1',
+    'gamma': 0.5,
+    'delta': 2.0,
+    'normalizers': '',
+    'ignore_repeated_bigrams': False,
+    'detection_z_threshold': 4.0,
+    'select_green_tokens': True,
+    'skip_model_load': True,
+    'seed_separately': True,
+}
+args.__dict__.update(arg_dict)
+from demo_watermark import main
+main(args)

demo_watermark.py ADDED Viewed

	@@ -0,0 +1,914 @@

+# coding=utf-8
+# Copyright 2023 Anonymous Authors of "A Watermark for Large Language Models"
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import argparse
+from pprint import pprint
+from functools import partial
+import numpy # for gradio hot reload
+import gradio as gr
+import torch
+from transformers import (AutoTokenizer,
+                          AutoModelForSeq2SeqLM,
+                          AutoModelForCausalLM,
+                          LogitsProcessorList)
+from transformers import GPT2TokenizerFast
+OPT_TOKENIZER = GPT2TokenizerFast
+from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
+API_MODEL_MAP = {
+    "bigscience/bloom"        : {"max_length": 1000, "gamma": 0.5, "delta": 2.0},
+    "bigscience/bloomz"       : {"max_length": 1000, "gamma": 0.5, "delta": 2.0},
+    "google/flan-ul2"         : {"max_length": 1000, "gamma": 0.5, "delta": 2.0},
+    "google/flan-t5-xxl"      : {"max_length": 1000, "gamma": 0.5, "delta": 2.0},
+    "EleutherAI/gpt-neox-20b" : {"max_length": 1000, "gamma": 0.5, "delta": 2.0},
+}
+def str2bool(v):
+    """Util function for user friendly boolean flag args"""
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+def parse_args():
+    """Command line argument specification"""
+    parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
+    parser.add_argument(
+        "--run_gradio",
+        type=str2bool,
+        default=True,
+        help="Whether to launch as a gradio demo. Set to False if not installed and want to just run the stdout version.",
+    )
+    parser.add_argument(
+        "--demo_public",
+        type=str2bool,
+        default=False,
+        help="Whether to expose the gradio demo to the internet.",
+    )
+    parser.add_argument(
+        "--model_name_or_path",
+        type=str,
+        default="facebook/opt-6.7b",
+        help="Main model, path to pretrained model or model identifier from huggingface.co/models.",
+    )
+    parser.add_argument(
+        "--prompt_max_length",
+        type=int,
+        default=None,
+        help="Truncation length for prompt, overrides model config's max length field.",
+    )
+    parser.add_argument(
+        "--max_new_tokens",
+        type=int,
+        default=200,
+        help="Maximmum number of new tokens to generate.",
+    )
+    parser.add_argument(
+        "--generation_seed",
+        type=int,
+        default=123,
+        help="Seed for setting the torch global rng prior to generation.",
+    )
+    parser.add_argument(
+        "--use_sampling",
+        type=str2bool,
+        default=True,
+        help="Whether to generate using multinomial sampling.",
+    )
+    parser.add_argument(
+        "--sampling_temp",
+        type=float,
+        default=0.7,
+        help="Sampling temperature to use when generating using multinomial sampling.",
+    )
+    parser.add_argument(
+        "--n_beams",
+        type=int,
+        default=1,
+        help="Number of beams to use for beam search. 1 is normal greedy decoding",
+    )
+    parser.add_argument(
+        "--use_gpu",
+        type=str2bool,
+        default=True,
+        help="Whether to run inference and watermark hashing/seeding/permutation on gpu.",
+    )
+    parser.add_argument(
+        "--seeding_scheme",
+        type=str,
+        default="simple_1",
+        help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
+    )
+    parser.add_argument(
+        "--gamma",
+        type=float,
+        default=0.25,
+        help="The fraction of the vocabulary to partition into the greenlist at each generation and verification step.",
+    )
+    parser.add_argument(
+        "--delta",
+        type=float,
+        default=2.0,
+        help="The amount/bias to add to each of the greenlist token logits before each token sampling step.",
+    )
+    parser.add_argument(
+        "--normalizers",
+        type=str,
+        default="",
+        help="Single or comma separated list of the preprocessors/normalizer names to use when performing watermark detection.",
+    )
+    parser.add_argument(
+        "--ignore_repeated_bigrams",
+        type=str2bool,
+        default=False,
+        help="Whether to use the detection method that only counts each unqiue bigram once as either a green or red hit.",
+    )
+    parser.add_argument(
+        "--detection_z_threshold",
+        type=float,
+        default=4.0,
+        help="The test statistic threshold for the detection hypothesis test.",
+    )
+    parser.add_argument(
+        "--select_green_tokens",
+        type=str2bool,
+        default=True,
+        help="How to treat the permuation when selecting the greenlist tokens at each step. Legacy is (False) to pick the complement/reds first.",
+    )
+    parser.add_argument(
+        "--skip_model_load",
+        type=str2bool,
+        default=False,
+        help="Skip the model loading to debug the interface.",
+    )
+    parser.add_argument(
+        "--seed_separately",
+        type=str2bool,
+        default=True,
+        help="Whether to call the torch seed function before both the unwatermarked and watermarked generate calls.",
+    )
+    parser.add_argument(
+        "--load_fp16",
+        type=str2bool,
+        default=False,
+        help="Whether to run model in float16 precsion.",
+    )
+    args = parser.parse_args()
+    return args
+def load_model(args):
+    """Load and return the model and tokenizer"""
+    args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
+    args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
+    if args.is_seq2seq_model:
+        model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name_or_path)
+    elif args.is_decoder_only_model:
+        if args.load_fp16:
+            model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path,torch_dtype=torch.float16, device_map='auto')
+        else:
+            model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path)
+    else:
+        raise ValueError(f"Unknown model type: {args.model_name_or_path}")
+    if args.use_gpu:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if args.load_fp16:
+            pass
+        else:
+            model = model.to(device)
+    else:
+        device = "cpu"
+    model.eval()
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
+    return model, tokenizer, device
+from text_generation import InferenceAPIClient
+from requests.exceptions import ReadTimeout
+def generate_with_api(prompt, args):
+    hf_api_key = os.environ.get("HF_API_KEY")
+    if hf_api_key is None:
+        raise ValueError("HF_API_KEY environment variable not set, cannot use HF API to generate text.")
+    client = InferenceAPIClient(args.model_name_or_path, token=hf_api_key, timeout=60)
+    assert args.n_beams == 1, "HF API models do not support beam search."
+    generation_params = {
+            "max_new_tokens": args.max_new_tokens,
+            "do_sample": args.use_sampling,
+        }
+    if args.use_sampling:
+        generation_params["temperature"] = args.sampling_temp
+        generation_params["seed"] = args.generation_seed
+    timeout_msg = "[Model API timeout error. Try reducing the max_new_tokens parameter or the prompt length.]"
+    try:
+        generation_params["watermark"] = False
+        without_watermark_iterator = client.generate_stream(prompt, **generation_params)
+    except ReadTimeout as e:
+        print(e)
+        without_watermark_iterator = (char for char in timeout_msg)
+    try:
+        generation_params["watermark"] = True
+        with_watermark_iterator = client.generate_stream(prompt, **generation_params)
+    except ReadTimeout as e:
+        print(e)
+        with_watermark_iterator = (char for char in timeout_msg)
+    all_without_words, all_with_words = "", ""
+    for without_word, with_word in zip(without_watermark_iterator, with_watermark_iterator):
+        all_without_words += without_word.token.text
+        all_with_words += with_word.token.text
+        yield all_without_words, all_with_words
+def check_prompt(prompt, args, tokenizer, model=None, device=None):
+    # This applies to both the local and API model scenarios
+    if args.model_name_or_path in API_MODEL_MAP:
+        args.prompt_max_length = API_MODEL_MAP[args.model_name_or_path]["max_length"]
+    elif hasattr(model.config,"max_position_embedding"):
+        args.prompt_max_length = model.config.max_position_embeddings-args.max_new_tokens
+    else:
+        args.prompt_max_length = 2048-args.max_new_tokens
+    tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True, max_length=args.prompt_max_length).to(device)
+    truncation_warning = True if tokd_input["input_ids"].shape[-1] == args.prompt_max_length else False
+    redecoded_input = tokenizer.batch_decode(tokd_input["input_ids"], skip_special_tokens=True)[0]
+    return (redecoded_input,
+            int(truncation_warning),
+            args)
+def generate(prompt, args, tokenizer, model=None, device=None):
+    """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
+       and generate watermarked text by passing it to the generate method of the model
+       as a logits processor. """
+    print(f"Generating with {args}")
+    print(f"Prompt: {prompt}")
+    if args.model_name_or_path in API_MODEL_MAP:
+        api_outputs = generate_with_api(prompt, args)
+        yield from api_outputs
+    else:
+        tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True, max_length=args.prompt_max_length).to(device)
+        watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
+                                                        gamma=args.gamma,
+                                                        delta=args.delta,
+                                                        seeding_scheme=args.seeding_scheme,
+                                                        select_green_tokens=args.select_green_tokens)
+        gen_kwargs = dict(max_new_tokens=args.max_new_tokens)
+        if args.use_sampling:
+            gen_kwargs.update(dict(
+                do_sample=True,
+                top_k=0,
+                temperature=args.sampling_temp
+            ))
+        else:
+            gen_kwargs.update(dict(
+                num_beams=args.n_beams
+            ))
+        generate_without_watermark = partial(
+            model.generate,
+            **gen_kwargs
+        )
+        generate_with_watermark = partial(
+            model.generate,
+            logits_processor=LogitsProcessorList([watermark_processor]),
+            **gen_kwargs
+        )
+        torch.manual_seed(args.generation_seed)
+        output_without_watermark = generate_without_watermark(**tokd_input)
+        # optional to seed before second generation, but will not be the same again generally, unless delta==0.0, no-op watermark
+        if args.seed_separately:
+            torch.manual_seed(args.generation_seed)
+        output_with_watermark = generate_with_watermark(**tokd_input)
+        if args.is_decoder_only_model:
+            # need to isolate the newly generated tokens
+            output_without_watermark = output_without_watermark[:,tokd_input["input_ids"].shape[-1]:]
+            output_with_watermark = output_with_watermark[:,tokd_input["input_ids"].shape[-1]:]
+        decoded_output_without_watermark = tokenizer.batch_decode(output_without_watermark, skip_special_tokens=True)[0]
+        decoded_output_with_watermark = tokenizer.batch_decode(output_with_watermark, skip_special_tokens=True)[0]
+        # mocking the API outputs in a whitespace split generator style
+        all_without_words, all_with_words = "", ""
+        for without_word, with_word in zip(decoded_output_without_watermark.split(), decoded_output_with_watermark.split()):
+            all_without_words += without_word + " "
+            all_with_words += with_word + " "
+            yield all_without_words, all_with_words
+def format_names(s):
+    """Format names for the gradio demo interface"""
+    s=s.replace("num_tokens_scored","Tokens Counted (T)")
+    s=s.replace("num_green_tokens","# Tokens in Greenlist")
+    s=s.replace("green_fraction","Fraction of T in Greenlist")
+    s=s.replace("z_score","z-score")
+    s=s.replace("p_value","p value")
+    s=s.replace("prediction","Prediction")
+    s=s.replace("confidence","Confidence")
+    return s
+def list_format_scores(score_dict, detection_threshold):
+    """Format the detection metrics into a gradio dataframe input format"""
+    lst_2d = []
+    for k,v in score_dict.items():
+        if k=='green_fraction':
+            lst_2d.append([format_names(k), f"{v:.1%}"])
+        elif k=='confidence':
+            lst_2d.append([format_names(k), f"{v:.3%}"])
+        elif isinstance(v, float):
+            lst_2d.append([format_names(k), f"{v:.3g}"])
+        elif isinstance(v, bool):
+            lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
+        else:
+            lst_2d.append([format_names(k), f"{v}"])
+    if "confidence" in score_dict:
+        lst_2d.insert(-2,["z-score Threshold", f"{detection_threshold}"])
+    else:
+        lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
+    return lst_2d
+def detect(input_text, args, tokenizer, device=None, return_green_token_mask=True):
+    """Instantiate the WatermarkDetection object and call detect on
+        the input text returning the scores and outcome of the test"""
+    print(f"Detecting with {args}")
+    print(f"Detection Tokenizer: {type(tokenizer)}")
+    watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
+                                        gamma=args.gamma,
+                                        seeding_scheme=args.seeding_scheme,
+                                        device=device,
+                                        tokenizer=tokenizer,
+                                        z_threshold=args.detection_z_threshold,
+                                        normalizers=args.normalizers,
+                                        ignore_repeated_bigrams=args.ignore_repeated_bigrams,
+                                        select_green_tokens=args.select_green_tokens)
+    # for now, just don't display the green token mask
+    # if we're using normalizers or ignore_repeated_bigrams
+    if args.normalizers != [] or args.ignore_repeated_bigrams:
+        return_green_token_mask = False
+    error = False
+    green_token_mask = None
+    if input_text == "":
+        error = True
+    else:
+        try:
+            score_dict = watermark_detector.detect(input_text, return_green_token_mask=return_green_token_mask)
+            green_token_mask = score_dict.pop("green_token_mask", None)
+            output = list_format_scores(score_dict, watermark_detector.z_threshold)
+        except ValueError as e:
+            print(e)
+            error = True
+    if error:
+        output = [["Error","string too short to compute metrics"]]
+        output += [["",""] for _ in range(6)]
+    html_output = "[No highlight markup generated]"
+    if green_token_mask is not None:
+        # hack bc we need a fast tokenizer with charspan support
+        if "opt" in args.model_name_or_path:
+            tokenizer = OPT_TOKENIZER.from_pretrained(args.model_name_or_path)
+        tokens = tokenizer(input_text)
+        if tokens["input_ids"][0] == tokenizer.bos_token_id:
+            tokens["input_ids"] = tokens["input_ids"][1:] # ignore attention mask
+        skip = watermark_detector.min_prefix_len
+        charspans = [tokens.token_to_chars(i) for i in range(skip,len(tokens["input_ids"]))]
+        charspans = [cs for cs in charspans if cs is not None] # remove the special token spans
+        if len(charspans) != len(green_token_mask): breakpoint()
+        assert len(charspans) == len(green_token_mask)
+        tags = [(f'<span class="green">{input_text[cs.start:cs.end]}</span>' if m else f'<span class="red">{input_text[cs.start:cs.end]}</span>') for cs, m in zip(charspans, green_token_mask)]
+        html_output = f'<p>{" ".join(tags)}</p>'
+    return output, args, tokenizer, html_output
+def run_gradio(args, model=None, device=None, tokenizer=None):
+    """Define and launch the gradio demo interface"""
+    check_prompt_partial = partial(check_prompt, model=model, device=device)
+    generate_partial = partial(generate, model=model, device=device)
+    detect_partial = partial(detect, device=device)
+    css = """
+    .green { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ccffcc; border-radius:0.5rem;}
+    .red { color: black!important;line-height:1.9em; padding: 0.2em 0.2em; background: #ffad99; border-radius:0.5rem;}
+    """
+    with gr.Blocks(css=css) as demo:
+        # Top section, greeting and instructions
+        with gr.Row():
+            with gr.Column(scale=9):
+                gr.Markdown(
+                """
+                ## 💧 A Watermark for Large Language Models 🔍
+                Demo made possible by the HuggingFace 🤗 [text-generation-inference](https://github.com/huggingface/text-generation-inference) serving framework.
+                """
+                )
+            with gr.Column(scale=1):
+                # if model_name_or_path at startup not one of the API models then add to dropdown
+                all_models = sorted(list(set(list(API_MODEL_MAP.keys())+[args.model_name_or_path])))
+                model_selector = gr.Dropdown(
+                    all_models,
+                    value=args.model_name_or_path,
+                    label="Language Model",
+                )
+        # Construct state for parameters, define updates and toggles
+        default_prompt = args.__dict__.pop("default_prompt")
+        session_args = gr.State(value=args)
+        # note that state obj automatically calls value if it's a callable, want to avoid calling tokenizer at startup
+        session_tokenizer = gr.State(value=lambda : tokenizer)
+        with gr.Tab("Welcome"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    gr.Markdown(
+                        """
+                        Potential harms of large language models can be mitigated by *watermarking* a model's output.
+                        *Watermarks* are embedded signals in the generated text that are invisible to humans but algorithmically
+                        detectable, that allow *anyone* to later check whether a given span of text
+                        was likely to have been generated by a model that uses the watermark.
+                        This space showcases a watermarking approach that can be applied to _any_ generative language model.
+                        For demonstration purposes, the space demos a selection of multi-billion parameter models (see the following note for caveats).
+                        """
+                        )
+                    with gr.Accordion("A note on the available models:",open=False):
+                        gr.Markdown(
+                            """
+                            This demo uses open-source language models. Today, these models are less powerful than proprietary commercial tools like ChatGPT, Claude, Bard, or Bing/Sydney.
+                            Models like [BLOOM (175B)](https://huggingface.co/bigscience/bloom) are designed to "complete" your prompt, and are not fine-tuned to follow instructions.
+                            For best results, prompt that model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
+                            Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
+                            Longer prompts that end mid-sentence will result in more fluent generations.
+                            Some of the models available in this demo are fine-tuned to follow instructions but have different strengths and will showcase different
+                            types of watermark behavior. [BLOOMZ](https://huggingface.co/bigscience/bloomz) is an instruction tuned variant of BLOOM capable of following instructions in dozens of languages zero-shot
+                            and can generate long and coherent paragraphs and stories given the right prompt.
+                            The FLAN models [FLAN-t5-xxl (11B)](https://huggingface.co/google/flan-t5-xxl) and [FLAN-UL2 (20B)](https://huggingface.co/google/flan-ul2) are fine-tuned on a variety of in-context few-shot learning NLP tasks,
+                            such as reasoning, and question answering.
+                            Generally, short, low entropy scenarios where the model has very few choices in terms of correct/suitable responses to the prompt
+                            will not exhibit as strong of a watermark presence, while longer watermarked outputs will produce higher detection statistics.
+                            """
+                            )
+                    gr.Markdown(
+                        """
+                        **[Generate & Detect]**: The first tab shows that the watermark can be embedded with
+                        negligible impact on text quality. You can try any prompt and compare the quality of
+                        normal text (*Output Without Watermark*) to the watermarked text (*Output With Watermark*) below it.
+                        You can also "see" the watermark by looking at the **Highlighted** tab where the tokens are
+                        colored green or red depending on which list they are in.
+                        Metrics on the right show that the watermark can be reliably detected given a reasonably small number of tokens (25-50).
+                        Detection is very efficient and does not use the language model or its parameters.
+                        **[Detector Only]**: You can also copy-paste the watermarked text (or any other text)
+                        into the second tab. This can be used to see how many sentences you could remove and still detect the watermark.
+                        You can also verify here that the detection has, by design, a low false-positive rate;
+                        This means that human-generated text that you copy into this detector will not be marked as machine-generated.
+                        """
+                        )
+                with gr.Column(scale=1):
+                    gr.Markdown(
+                        """
+                        ![](https://drive.google.com/uc?export=view&id=1yVLPcjm-xvaCjQyc3FGLsWIU84v1QRoC)
+                        """
+                    )
+        with gr.Tab("Generate & Detect"):
+            with gr.Row():
+                prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
+            with gr.Row():
+                generate_btn = gr.Button("Generate")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Tab("Output Without Watermark (Raw Text)"):
+                        output_without_watermark = gr.Textbox(interactive=False,lines=14,max_lines=14)
+                    with gr.Tab("Highlighted"):
+                        html_without_watermark = gr.HTML(elem_id="html-without-watermark")
+                with gr.Column(scale=1):
+                    without_watermark_detection_result = gr.Dataframe(headers=["Metric", "Value"], interactive=False,row_count=7,col_count=2)
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Tab("Output With Watermark (Raw Text)"):
+                        output_with_watermark = gr.Textbox(interactive=False,lines=14,max_lines=14)
+                    with gr.Tab("Highlighted"):
+                        html_with_watermark = gr.HTML(elem_id="html-with-watermark")
+                with gr.Column(scale=1):
+                    with_watermark_detection_result = gr.Dataframe(headers=["Metric", "Value"],interactive=False,row_count=7,col_count=2)
+            redecoded_input = gr.Textbox(visible=False)
+            truncation_warning = gr.Number(visible=False)
+            def truncate_prompt(redecoded_input, truncation_warning, orig_prompt, args):
+                if truncation_warning:
+                    return redecoded_input + f"\n\n[Prompt was truncated before generation due to length...]", args
+                else:
+                    return orig_prompt, args
+        with gr.Tab("Detector Only"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Tab("Text to Analyze"):
+                        detection_input = gr.Textbox(interactive=True,lines=14,max_lines=14)
+                    with gr.Tab("Highlighted"):
+                        html_detection_input = gr.HTML(elem_id="html-detection-input")
+                with gr.Column(scale=1):
+                    detection_result = gr.Dataframe(headers=["Metric", "Value"], interactive=False,row_count=7,col_count=2)
+            with gr.Row():
+                    detect_btn = gr.Button("Detect")
+        # Parameter selection group
+        with gr.Accordion("Advanced Settings",open=False):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown(f"#### Generation Parameters")
+                    with gr.Row():
+                        decoding = gr.Radio(label="Decoding Method",choices=["multinomial", "greedy"], value=("multinomial" if args.use_sampling else "greedy"))
+                    with gr.Row():
+                        sampling_temp = gr.Slider(label="Sampling Temperature", minimum=0.1, maximum=1.0, step=0.1, value=args.sampling_temp, visible=True)
+                    with gr.Row():
+                        generation_seed = gr.Number(label="Generation Seed",value=args.generation_seed, interactive=True)
+                    with gr.Row():
+                        n_beams = gr.Dropdown(label="Number of Beams",choices=list(range(1,11,1)), value=args.n_beams, visible=((not args.use_sampling) and (not args.model_name_or_path in API_MODEL_MAP)))
+                    with gr.Row():
+                        max_new_tokens = gr.Slider(label="Max Generated Tokens", minimum=10, maximum=1000, step=10, value=args.max_new_tokens)
+                with gr.Column(scale=1):
+                    gr.Markdown(f"#### Watermark Parameters")
+                    with gr.Row():
+                        gamma = gr.Slider(label="gamma",minimum=0.1, maximum=0.9, step=0.05, value=args.gamma)
+                    with gr.Row():
+                        delta = gr.Slider(label="delta",minimum=0.0, maximum=10.0, step=0.1, value=args.delta)
+                    gr.Markdown(f"#### Detector Parameters")
+                    with gr.Row():
+                        detection_z_threshold = gr.Slider(label="z-score threshold",minimum=0.0, maximum=10.0, step=0.1, value=args.detection_z_threshold)
+                    with gr.Row():
+                        ignore_repeated_bigrams = gr.Checkbox(label="Ignore Bigram Repeats")
+                    with gr.Row():
+                        normalizers = gr.CheckboxGroup(label="Normalizations", choices=["unicode", "homoglyphs", "truecase"], value=args.normalizers)
+            with gr.Row():
+                gr.Markdown(f"_Note: sliders don't always update perfectly. Clicking on the bar or using the number window to the right can help. Window below shows the current settings._")
+            with gr.Row():
+                current_parameters = gr.Textbox(label="Current Parameters", value=args)
+            with gr.Accordion("Legacy Settings",open=False):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
+                    with gr.Column(scale=1):
+                        select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
+        with gr.Accordion("What do the settings do?",open=False):
+            gr.Markdown(
+            """
+            #### Generation Parameters:
+            - **Decoding Method** : We can generate tokens from the model using either multinomial sampling or we can generate using greedy decoding.
+            - **Sampling Temperature** : If using multinomial sampling we can set the temperature of the sampling distribution.
+                                0.0 is equivalent to greedy decoding, and 1.0 is the maximum amount of variability/entropy in the next token distribution.
+                                0.7 strikes a nice balance between faithfulness to the model's estimate of top candidates while adding variety. Does not apply for greedy decoding.
+            - **Generation Seed** : The integer to pass to the torch random number generator before running generation. Makes the multinomial sampling strategy
+                                outputs reproducible. Does not apply for greedy decoding.
+            - **Number of Beams** : When using greedy decoding, we can also set the number of beams to > 1 to enable beam search.
+                                This is not implemented/excluded from paper for multinomial sampling but may be added in future.
+            - **Max Generated Tokens** : The `max_new_tokens` parameter passed to the generation method to stop the output at a certain number of new tokens.
+                                    Note that the model is free to generate fewer tokens depending on the prompt.
+                                    Implicitly this sets the maximum number of prompt tokens possible as the model's maximum input length minus `max_new_tokens`,
+                                    and inputs will be truncated accordingly.
+            #### Watermark Parameters:
+            - **gamma** : The fraction of the vocabulary to be partitioned into the greenlist at each generation step.
+                     Smaller gamma values create a stronger watermark by enabling the watermarked model to achieve
+                     a greater differentiation from human/unwatermarked text because it is preferentially sampling
+                     from a smaller green set making those tokens less likely to occur by chance.
+            - **delta** : The amount of positive bias to add to the logits of every token in the greenlist
+                        at each generation step before sampling/choosing the next token. Higher delta values
+                        mean that the greenlist tokens are more heavily preferred by the watermarked model
+                        and as the bias becomes very large the watermark transitions from "soft" to "hard".
+                        For a hard watermark, nearly all tokens are green, but this can have a detrimental effect on
+                        generation quality, especially when there is not a lot of flexibility in the distribution.
+            #### Detector Parameters:
+            - **z-score threshold** : the z-score cuttoff for the hypothesis test. Higher thresholds (such as 4.0) make
+                                _false positives_ (predicting that human/unwatermarked text is watermarked) very unlikely
+                                as a genuine human text with a significant number of tokens will almost never achieve
+                                that high of a z-score. Lower thresholds will capture more _true positives_ as some watermarked
+                                texts will contain less green tokens and achive a lower z-score, but still pass the lower bar and
+                                be flagged as "watermarked". However, a lowere threshold will increase the chance that human text
+                                that contains a slightly higher than average number of green tokens is erroneously flagged.
+                                4.0-5.0 offers extremely low false positive rates while still accurately catching most watermarked text.
+            - **Ignore Bigram Repeats** : This alternate detection algorithm only considers the unique bigrams in the text during detection,
+                                    computing the greenlists based on the first in each pair and checking whether the second falls within the list.
+                                    This means that `T` is now the unique number of bigrams in the text, which becomes less than the total
+                                    number of tokens generated if the text contains a lot of repetition. See the paper for a more detailed discussion.
+            - **Normalizations** : we implement a few basic normaliations to defend against various adversarial perturbations of the
+                                text analyzed during detection. Currently we support converting all chracters to unicode,
+                                replacing homoglyphs with a canonical form, and standardizing the capitalization.
+                                See the paper for a detailed discussion of input normalization.
+            """
+            )
+        with gr.Accordion("What do the output metrics mean?",open=False):
+            gr.Markdown(
+            """
+            - `z-score threshold` : The cuttoff for the hypothesis test
+            - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
+                The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
+                a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
+                described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
+            - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
+            - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
+            - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
+                we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
+            - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
+                observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
+                If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
+            -  `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
+            - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
+                the confidence of the detection based on the unlikeliness of this `z-score` observation.
+            """
+            )
+        # Register main generation tab click, outputing generations as well as a the encoded+redecoded+potentially truncated prompt and flag, then call detection
+        generate_btn.click(fn=check_prompt_partial, inputs=[prompt,session_args,session_tokenizer], outputs=[redecoded_input, truncation_warning, session_args]).success(
+                           fn=generate_partial, inputs=[redecoded_input,session_args,session_tokenizer], outputs=[output_without_watermark, output_with_watermark]).success(
+                           fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark]).success(
+                           fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        # Show truncated version of prompt if truncation occurred
+        redecoded_input.change(fn=truncate_prompt, inputs=[redecoded_input,truncation_warning,prompt,session_args], outputs=[prompt,session_args])
+        # Register main detection tab click
+        detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result, session_args,session_tokenizer,html_detection_input], api_name="detection")
+        # State management logic
+        # define update callbacks that change the state dict
+        def update_model(session_state, value): session_state.model_name_or_path = value; return session_state
+        def update_sampling_temp(session_state, value): session_state.sampling_temp = float(value); return session_state
+        def update_generation_seed(session_state, value): session_state.generation_seed = int(value); return session_state
+        def update_gamma(session_state, value): session_state.gamma = float(value); return session_state
+        def update_delta(session_state, value): session_state.delta = float(value); return session_state
+        def update_detection_z_threshold(session_state, value): session_state.detection_z_threshold = float(value); return session_state
+        def update_decoding(session_state, value):
+            if value == "multinomial":
+                session_state.use_sampling = True
+            elif value == "greedy":
+                session_state.use_sampling = False
+            return session_state
+        def toggle_sampling_vis(value):
+            if value == "multinomial":
+                return gr.update(visible=True)
+            elif value == "greedy":
+                return gr.update(visible=False)
+        def toggle_sampling_vis_inv(value):
+            if value == "multinomial":
+                return gr.update(visible=False)
+            elif value == "greedy":
+                return gr.update(visible=True)
+        # if model name is in the list of api models, set the num beams parameter to 1 and hide n_beams
+        def toggle_vis_for_api_model(value):
+            if value in API_MODEL_MAP:
+                return gr.update(visible=False)
+            else:
+                return gr.update(visible=True)
+        def toggle_beams_for_api_model(value, orig_n_beams):
+            if value in API_MODEL_MAP:
+                return gr.update(value=1)
+            else:
+                return gr.update(value=orig_n_beams)
+        # if model name is in the list of api models, set the interactive parameter to false
+        def toggle_interactive_for_api_model(value):
+            if value in API_MODEL_MAP:
+                return gr.update(interactive=False)
+            else:
+                return gr.update(interactive=True)
+        # if model name is in the list of api models, set gamma and delta based on API map
+        def toggle_gamma_for_api_model(value, orig_gamma):
+            if value in API_MODEL_MAP:
+                return gr.update(value=API_MODEL_MAP[value]["gamma"])
+            else:
+                return gr.update(value=orig_gamma)
+        def toggle_delta_for_api_model(value, orig_delta):
+            if value in API_MODEL_MAP:
+                return gr.update(value=API_MODEL_MAP[value]["delta"])
+            else:
+                return gr.update(value=orig_delta)
+        def update_n_beams(session_state, value): session_state.n_beams = value; return session_state
+        def update_max_new_tokens(session_state, value): session_state.max_new_tokens = int(value); return session_state
+        def update_ignore_repeated_bigrams(session_state, value): session_state.ignore_repeated_bigrams = value; return session_state
+        def update_normalizers(session_state, value): session_state.normalizers = value; return session_state
+        def update_seed_separately(session_state, value): session_state.seed_separately = value; return session_state
+        def update_select_green_tokens(session_state, value): session_state.select_green_tokens = value; return session_state
+        def update_tokenizer(model_name_or_path):
+            # if model_name_or_path == ALPACA_MODEL_NAME:
+            #     return ALPACA_MODEL_TOKENIZER.from_pretrained(ALPACA_TOKENIZER_PATH)
+            # else:
+            return AutoTokenizer.from_pretrained(model_name_or_path)
+        def check_model(value): return value if (value!="" and value is not None) else args.model_name_or_path
+        # enforce constraint that model cannot be null or empty
+        # then attach model callbacks in particular
+        model_selector.change(check_model, inputs=[model_selector], outputs=[model_selector]).then(
+            toggle_vis_for_api_model,inputs=[model_selector], outputs=[n_beams]
+        ).then(
+            toggle_beams_for_api_model,inputs=[model_selector,n_beams], outputs=[n_beams]
+        ).then(
+            toggle_interactive_for_api_model,inputs=[model_selector], outputs=[gamma]
+        ).then(
+            toggle_interactive_for_api_model,inputs=[model_selector], outputs=[delta]
+        ).then(
+            toggle_gamma_for_api_model,inputs=[model_selector,gamma], outputs=[gamma]
+        ).then(
+            toggle_delta_for_api_model,inputs=[model_selector,delta], outputs=[delta]
+        ).then(
+            update_tokenizer,inputs=[model_selector], outputs=[session_tokenizer]
+        ).then(
+            update_model,inputs=[session_args, model_selector], outputs=[session_args]
+        ).then(
+            lambda value: str(value), inputs=[session_args], outputs=[current_parameters]
+        )
+        # registering callbacks for toggling the visibilty of certain parameters based on the values of others
+        decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[sampling_temp])
+        decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[generation_seed])
+        decoding.change(toggle_sampling_vis_inv,inputs=[decoding], outputs=[n_beams])
+        decoding.change(toggle_vis_for_api_model,inputs=[model_selector], outputs=[n_beams])
+        # registering all state update callbacks
+        decoding.change(update_decoding,inputs=[session_args, decoding], outputs=[session_args])
+        sampling_temp.change(update_sampling_temp,inputs=[session_args, sampling_temp], outputs=[session_args])
+        generation_seed.change(update_generation_seed,inputs=[session_args, generation_seed], outputs=[session_args])
+        n_beams.change(update_n_beams,inputs=[session_args, n_beams], outputs=[session_args])
+        max_new_tokens.change(update_max_new_tokens,inputs=[session_args, max_new_tokens], outputs=[session_args])
+        gamma.change(update_gamma,inputs=[session_args, gamma], outputs=[session_args])
+        delta.change(update_delta,inputs=[session_args, delta], outputs=[session_args])
+        detection_z_threshold.change(update_detection_z_threshold,inputs=[session_args, detection_z_threshold], outputs=[session_args])
+        ignore_repeated_bigrams.change(update_ignore_repeated_bigrams,inputs=[session_args, ignore_repeated_bigrams], outputs=[session_args])
+        normalizers.change(update_normalizers,inputs=[session_args, normalizers], outputs=[session_args])
+        seed_separately.change(update_seed_separately,inputs=[session_args, seed_separately], outputs=[session_args])
+        select_green_tokens.change(update_select_green_tokens,inputs=[session_args, select_green_tokens], outputs=[session_args])
+        # register additional callback on button clicks that updates the shown parameters window
+        generate_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        detect_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        # When the parameters change, display the update and also fire detection, since some detection params dont change the model output.
+        delta.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        gamma.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        gamma.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark])
+        gamma.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        gamma.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer,html_detection_input])
+        detection_z_threshold.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        detection_z_threshold.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark])
+        detection_z_threshold.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        detection_z_threshold.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer,html_detection_input])
+        ignore_repeated_bigrams.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer,html_detection_input])
+        normalizers.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        normalizers.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark])
+        normalizers.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        normalizers.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer,html_detection_input])
+        select_green_tokens.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        select_green_tokens.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer,html_without_watermark])
+        select_green_tokens.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer,html_with_watermark])
+        select_green_tokens.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer,html_detection_input])
+    demo.queue(concurrency_count=3)
+    if args.demo_public:
+        demo.launch(share=True) # exposes app to the internet via randomly generated link
+    else:
+        demo.launch()
+def main(args):
+    """Run a command line version of the generation and detection operations
+        and optionally launch and serve the gradio demo"""
+    # Initial arg processing and log
+    args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
+    print(args)
+    if not args.skip_model_load:
+        model, tokenizer, device = load_model(args)
+    else:
+        model, tokenizer, device = None, None, None
+        tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
+        if args.use_gpu:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            device = "cpu"
+    # terrapin example
+    input_text = (
+        "The aardvark is sometimes colloquially called the 'African ant bear',[6] 'anteater' (not to be confused with the South American anteater), or the 'Cape anteater'[6] after the Cape of Good Hope. The name 'aardvark' is Afrikaans (Afrikaans pronunciation: [ˈɑːrtfark]), comes from earlier Afrikaans erdvark[6] and means 'earth pig' or 'ground pig' (aarde: 'earth/ground', vark: 'pig'), because of its burrowing habits.[7][8][9] The name Orycteropus means 'burrowing foot', and the name afer refers to Africa.[10] The name of the aardvark's order, Tubulidentata, comes from the tubule-style teeth.[11]\n\nThe aardvark is not closely related to the pig; rather, it is the sole extant representative of the obscure mammalian order Tubulidentata,[10] in which it is usually considered to form one variable species of the genus Orycteropus, the sole surviving genus in the family Orycteropodidae. The aardvark is"
+    )
+    args.default_prompt = input_text
+    # Generate and detect, report to stdout
+    if not args.skip_model_load:
+        term_width = 80
+        print("#"*term_width)
+        print("Prompt:")
+        print(input_text)
+        # a generator that yields (without_watermark, with_watermark) pairs
+        generator_outputs = generate(input_text,
+                                    args,
+                                    model=model,
+                                    device=device,
+                                    tokenizer=tokenizer)
+        # we need to iterate over it,
+        # but we only want the last output in this case
+        for out in generator_outputs:
+            decoded_output_without_watermark = out[0]
+            decoded_output_with_watermark = out[1]
+        without_watermark_detection_result = detect(decoded_output_without_watermark,
+                                                    args,
+                                                    device=device,
+                                                    tokenizer=tokenizer,
+                                                    return_green_token_mask=False)
+        with_watermark_detection_result = detect(decoded_output_with_watermark,
+                                                 args,
+                                                 device=device,
+                                                 tokenizer=tokenizer,
+                                                 return_green_token_mask=False)
+        print("#"*term_width)
+        print("Output without watermark:")
+        print(decoded_output_without_watermark)
+        print("-"*term_width)
+        print(f"Detection result @ {args.detection_z_threshold}:")
+        pprint(without_watermark_detection_result)
+        print("-"*term_width)
+        print("#"*term_width)
+        print("Output with watermark:")
+        print(decoded_output_with_watermark)
+        print("-"*term_width)
+        print(f"Detection result @ {args.detection_z_threshold}:")
+        pprint(with_watermark_detection_result)
+        print("-"*term_width)
+    # Launch the app to generate and detect interactively (implements the hf space demo)
+    if args.run_gradio:
+        run_gradio(args, model=model, tokenizer=tokenizer, device=device)
+    return
+if __name__ == "__main__":
+    args = parse_args()
+    print(args)
+    main(args)

homoglyph_data/__init__.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# This is data for homoglyph finding
+"""Original package info:
+Homoglyphs
+* Get similar letters
+* Convert string to ASCII letters
+* Detect possible letter languages
+* Detect letter UTF-8 group.
+# main package info
+__title__ = 'Homoglyphs'
+__version__ = '2.0.4'
+__author__ = 'Gram Orsinium'
+__license__ = 'MIT'
+# License:
+MIT License 2019 orsinium <master_fess@mail.ru>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""

homoglyph_data/categories.json ADDED Viewed

The diff for this file is too large to render. See raw diff

homoglyph_data/confusables_sept2022.json ADDED Viewed

The diff for this file is too large to render. See raw diff

homoglyph_data/languages.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "ar": "ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ",
+    "be": "ʼЁІЎАБВГДЕЖЗЙКЛМНОПРСТУФХЦЧШЫЬЭЮЯабвгдежзйклмнопрстуфхцчшыьэюяёіў",
+    "bg": "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя",
+    "ca": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÈÉÍÏÒÓÚÜÇàèéíïòóúüç·",
+    "cz": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÉÍÓÚÝáéíóúýČčĎďĚěŇňŘřŠšŤťŮůŽž",
+    "da": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÅÆØåæø",
+    "de": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÄÖÜßäöü",
+    "el": "ΪΫΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΐΰϊϋάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ",
+    "en": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+    "eo": "ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzĈĉĜĝĤĥĴĵŜŝŬŭ",
+    "es": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÉÍÑÓÚÜáéíñóúü",
+    "et": "ABDEGHIJKLMNOPRSTUVabdeghijklmnoprstuvÄÕÖÜäõöü",
+    "fi": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÄÅÖäåöŠšŽž",
+    "fr": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÂÇÈÉÊÎÏÙÛàâçèéêîïùûŒœ",
+    "he": "אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ",
+    "hr": "ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzĆćČčĐđŠšŽž",
+    "hu": "ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű",
+    "it": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÈÉÌÒÓÙàèéìòóù",
+    "lt": "ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzĄąČčĖėĘęĮįŠšŪūŲųŽž",
+    "lv": "ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž",
+    "mk": "ЃЅЈЉЊЌЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшѓѕјљњќџ",
+    "nl": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+    "pl": "ABCDEFGHIJKLMNOPRSTUWYZabcdefghijklmnoprstuwyzÓóĄąĆćĘęŁłŃńŚśŹźŻż",
+    "pt": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÁÂÃÇÉÊÍÓÔÕÚàáâãçéêíóôõú",
+    "ro": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÂÎâîĂăȘșȚț",
+    "ru": "ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё",
+    "sk": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÁÄÉÍÓÔÚÝáäéíóôúýČčĎďĹĺĽľŇňŔŕŠšŤťŽž",
+    "sl": "ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzČčŠšŽž",
+    "sr": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzЂЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшђјљњћџ",
+    "th": "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛",
+    "tr": "ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş",
+    "vi": "ABCDEGHIKLMNOPQRSTUVXYabcdeghiklmnopqrstuvxyÂÊÔâêôĂăĐđƠơƯư"
+}

homoglyphs.py ADDED Viewed

	@@ -0,0 +1,265 @@

+"""Updated version of core.py from
+https://github.com/yamatt/homoglyphs/tree/main/homoglyphs_fork
+for modern python3
+"""
+from collections import defaultdict
+import json
+from itertools import product
+import os
+import unicodedata
+# Actions if char not in alphabet
+STRATEGY_LOAD = 1  # load category for this char
+STRATEGY_IGNORE = 2  # add char to result
+STRATEGY_REMOVE = 3  # remove char from result
+ASCII_RANGE = range(128)
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_LOCATION = os.path.join(CURRENT_DIR, "homoglyph_data")
+class Categories:
+    """
+    Work with aliases from ISO 15924.
+    https://en.wikipedia.org/wiki/ISO_15924#List_of_codes
+    """
+    fpath = os.path.join(DATA_LOCATION, "categories.json")
+    @classmethod
+    def _get_ranges(cls, categories):
+        """
+        :return: iter: (start code, end code)
+        :rtype: list
+        """
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        for category in categories:
+            if category not in data["aliases"]:
+                raise ValueError("Invalid category: {}".format(category))
+        for point in data["points"]:
+            if point[2] in categories:
+                yield point[:2]
+    @classmethod
+    def get_alphabet(cls, categories):
+        """
+        :return: set of chars in alphabet by categories list
+        :rtype: set
+        """
+        alphabet = set()
+        for start, end in cls._get_ranges(categories):
+            chars = (chr(code) for code in range(start, end + 1))
+            alphabet.update(chars)
+        return alphabet
+    @classmethod
+    def detect(cls, char):
+        """
+        :return: category
+        :rtype: str
+        """
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        # try detect category by unicodedata
+        try:
+            category = unicodedata.name(char).split()[0]
+        except (TypeError, ValueError):
+            # In Python2 unicodedata.name raise error for non-unicode chars
+            # Python3 raise ValueError for non-unicode characters
+            pass
+        else:
+            if category in data["aliases"]:
+                return category
+        # try detect category by ranges from JSON file.
+        code = ord(char)
+        for point in data["points"]:
+            if point[0] <= code <= point[1]:
+                return point[2]
+    @classmethod
+    def get_all(cls):
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        return set(data["aliases"])
+class Languages:
+    fpath = os.path.join(DATA_LOCATION, "languages.json")
+    @classmethod
+    def get_alphabet(cls, languages):
+        """
+        :return: set of chars in alphabet by languages list
+        :rtype: set
+        """
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        alphabet = set()
+        for lang in languages:
+            if lang not in data:
+                raise ValueError("Invalid language code: {}".format(lang))
+            alphabet.update(data[lang])
+        return alphabet
+    @classmethod
+    def detect(cls, char):
+        """
+        :return: set of languages which alphabet contains passed char.
+        :rtype: set
+        """
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        languages = set()
+        for lang, alphabet in data.items():
+            if char in alphabet:
+                languages.add(lang)
+        return languages
+    @classmethod
+    def get_all(cls):
+        with open(cls.fpath, encoding="utf-8") as f:
+            data = json.load(f)
+        return set(data.keys())
+class Homoglyphs:
+    def __init__(
+        self,
+        categories=None,
+        languages=None,
+        alphabet=None,
+        strategy=STRATEGY_IGNORE,
+        ascii_strategy=STRATEGY_IGNORE,
+        ascii_range=ASCII_RANGE,
+    ):
+        # strategies
+        if strategy not in (STRATEGY_LOAD, STRATEGY_IGNORE, STRATEGY_REMOVE):
+            raise ValueError("Invalid strategy")
+        self.strategy = strategy
+        self.ascii_strategy = ascii_strategy
+        self.ascii_range = ascii_range
+        # Homoglyphs must be initialized by any alphabet for correct work
+        if not categories and not languages and not alphabet:
+            categories = ("LATIN", "COMMON")
+        # cats and langs
+        self.categories = set(categories or [])
+        self.languages = set(languages or [])
+        # alphabet
+        self.alphabet = set(alphabet or [])
+        if self.categories:
+            alphabet = Categories.get_alphabet(self.categories)
+            self.alphabet.update(alphabet)
+        if self.languages:
+            alphabet = Languages.get_alphabet(self.languages)
+            self.alphabet.update(alphabet)
+        self.table = self.get_table(self.alphabet)
+    @staticmethod
+    def get_table(alphabet):
+        table = defaultdict(set)
+        with open(os.path.join(DATA_LOCATION, "confusables_sept2022.json")) as f:
+            data = json.load(f)
+        for char in alphabet:
+            if char in data:
+                for homoglyph in data[char]:
+                    if homoglyph in alphabet:
+                        table[char].add(homoglyph)
+        return table
+    @staticmethod
+    def get_restricted_table(source_alphabet, target_alphabet):
+        table = defaultdict(set)
+        with open(os.path.join(DATA_LOCATION, "confusables_sept2022.json")) as f:
+            data = json.load(f)
+        for char in source_alphabet:
+            if char in data:
+                for homoglyph in data[char]:
+                    if homoglyph in target_alphabet:
+                        table[char].add(homoglyph)
+        return table
+    @staticmethod
+    def uniq_and_sort(data):
+        result = list(set(data))
+        result.sort(key=lambda x: (-len(x), x))
+        return result
+    def _update_alphabet(self, char):
+        # try detect languages
+        langs = Languages.detect(char)
+        if langs:
+            self.languages.update(langs)
+            alphabet = Languages.get_alphabet(langs)
+            self.alphabet.update(alphabet)
+        else:
+            # try detect categories
+            category = Categories.detect(char)
+            if category is None:
+                return False
+            self.categories.add(category)
+            alphabet = Categories.get_alphabet([category])
+            self.alphabet.update(alphabet)
+        # update table for new alphabet
+        self.table = self.get_table(self.alphabet)
+        return True
+    def _get_char_variants(self, char):
+        if char not in self.alphabet:
+            if self.strategy == STRATEGY_LOAD:
+                if not self._update_alphabet(char):
+                    return []
+            elif self.strategy == STRATEGY_IGNORE:
+                return [char]
+            elif self.strategy == STRATEGY_REMOVE:
+                return []
+        # find alternative chars for current char
+        alt_chars = self.table.get(char, set())
+        if alt_chars:
+            # find alternative chars for alternative chars for current char
+            alt_chars2 = [self.table.get(alt_char, set()) for alt_char in alt_chars]
+            # combine all alternatives
+            alt_chars.update(*alt_chars2)
+        # add current char to alternatives
+        alt_chars.add(char)
+        # uniq, sort and return
+        return self.uniq_and_sort(alt_chars)
+    def _get_combinations(self, text, ascii=False):
+        variations = []
+        for char in text:
+            alt_chars = self._get_char_variants(char)
+            if ascii:
+                alt_chars = [char for char in alt_chars if ord(char) in self.ascii_range]
+                if not alt_chars and self.ascii_strategy == STRATEGY_IGNORE:
+                    return
+            if alt_chars:
+                variations.append(alt_chars)
+        if variations:
+            for variant in product(*variations):
+                yield "".join(variant)
+    def get_combinations(self, text):
+        return list(self._get_combinations(text))
+    def _to_ascii(self, text):
+        for variant in self._get_combinations(text, ascii=True):
+            if max(map(ord, variant)) in self.ascii_range:
+                yield variant
+    def to_ascii(self, text):
+        return self.uniq_and_sort(self._to_ascii(text))

normalizers.py ADDED Viewed

	@@ -0,0 +1,202 @@

+""" Text-based normalizers, used to mitigate simple attacks against watermarking.
+This implementation is unlikely to be a complete list of all possible exploits within the unicode standard,
+it represents our best effort at the time of writing.
+These normalizers can be used as stand-alone normalizers. They could be made to conform to HF tokenizers standard, but that would
+require messing with the limited rust interface of tokenizers.NormalizedString
+"""
+from collections import defaultdict
+from functools import cache
+import re
+import unicodedata
+import homoglyphs as hg
+def normalization_strategy_lookup(strategy_name: str) -> object:
+    if strategy_name == "unicode":
+        return UnicodeSanitizer()
+    elif strategy_name == "homoglyphs":
+        return HomoglyphCanonizer()
+    elif strategy_name == "truecase":
+        return TrueCaser()
+class HomoglyphCanonizer:
+    """Attempts to detect homoglyph attacks and find a consistent canon.
+    This function does so on a per-ISO-category level. Language-level would also be possible (see commented code).
+    """
+    def __init__(self):
+        self.homoglyphs = None
+    def __call__(self, homoglyphed_str: str) -> str:
+        # find canon:
+        target_category, all_categories = self._categorize_text(homoglyphed_str)
+        homoglyph_table = self._select_canon_category_and_load(target_category, all_categories)
+        return self._sanitize_text(target_category, homoglyph_table, homoglyphed_str)
+    def _categorize_text(self, text: str) -> dict:
+        iso_categories = defaultdict(int)
+        # self.iso_languages = defaultdict(int)
+        for char in text:
+            iso_categories[hg.Categories.detect(char)] += 1
+            # for lang in hg.Languages.detect(char):
+            #     self.iso_languages[lang] += 1
+        target_category = max(iso_categories, key=iso_categories.get)
+        all_categories = tuple(iso_categories)
+        return target_category, all_categories
+    @cache
+    def _select_canon_category_and_load(self, target_category: str, all_categories: tuple[str]) -> dict:
+        homoglyph_table = hg.Homoglyphs(categories=(target_category, "COMMON"))  # alphabet loaded here from file
+        source_alphabet = hg.Categories.get_alphabet(all_categories)
+        restricted_table = homoglyph_table.get_restricted_table(source_alphabet, homoglyph_table.alphabet)  # table loaded here from file
+        return restricted_table
+    def _sanitize_text(self, target_category: str, homoglyph_table: dict, homoglyphed_str: str) -> str:
+        sanitized_text = ""
+        for char in homoglyphed_str:
+            # langs = hg.Languages.detect(char)
+            cat = hg.Categories.detect(char)
+            if target_category in cat or "COMMON" in cat or len(cat) == 0:
+                sanitized_text += char
+            else:
+                sanitized_text += list(homoglyph_table[char])[0]
+        return sanitized_text
+class UnicodeSanitizer:
+    """Regex-based unicode sanitzer. Has different levels of granularity.
+    * ruleset="whitespaces"    - attempts to remove only whitespace unicode characters
+    * ruleset="IDN.blacklist"  - does its best to remove unusual unicode based on  Network.IDN.blacklist characters
+    * ruleset="ascii"          - brute-forces all text into ascii
+    This is unlikely to be a comprehensive list.
+    You can find a more comprehensive discussion at https://www.unicode.org/reports/tr36/
+    and https://www.unicode.org/faq/security.html
+    """
+    def __init__(self, ruleset="whitespaces"):
+        if ruleset == "whitespaces":
+            """Documentation:
+            \u00A0: Non-breaking space
+            \u1680: Ogham space mark
+            \u180E: Mongolian vowel separator
+            \u2000-\u200B: Various space characters, including en space, em space, thin space, hair space, zero-width space, and zero-width non-joiner
+            \u200C\u200D: Zero-width non-joiner and zero-width joiner
+            \u200E,\u200F: Left-to-right-mark, Right-to-left-mark
+            \u2060: Word joiner
+            \u2063: Invisible separator
+            \u202F: Narrow non-breaking space
+            \u205F: Medium mathematical space
+            \u3000: Ideographic space
+            \uFEFF: Zero-width non-breaking space
+            \uFFA0: Halfwidth hangul filler
+            \uFFF9\uFFFA\uFFFB: Interlinear annotation characters
+            \uFE00-\uFE0F: Variation selectors
+            \u202A-\u202F: Embedding characters
+            \u3164: Korean hangul filler.
+            Note that these characters are not always superfluous whitespace characters!
+            """
+            self.pattern = re.compile(
+                r"[\u00A0\u1680\u180E\u2000-\u200B\u200C\u200D\u200E\u200F\u2060\u2063\u202F\u205F\u3000\uFEFF\uFFA0\uFFF9\uFFFA\uFFFB"
+                r"\uFE00\uFE01\uFE02\uFE03\uFE04\uFE05\uFE06\uFE07\uFE08\uFE09\uFE0A\uFE0B\uFE0C\uFE0D\uFE0E\uFE0F\u3164\u202A\u202B\u202C\u202D"
+                r"\u202E\u202F]"
+            )
+        elif ruleset == "IDN.blacklist":
+            """Documentation:
+            [\u00A0\u1680\u180E\u2000-\u200B\u202F\u205F\u2060\u2063\uFEFF]: Matches any whitespace characters in the Unicode character
+                        set that are included in the IDN blacklist.
+            \uFFF9-\uFFFB: Matches characters that are not defined in Unicode but are used as language tags in various legacy encodings.
+                        These characters are not allowed in domain names.
+            \uD800-\uDB7F: Matches the first part of a surrogate pair. Surrogate pairs are used to represent characters in the Unicode character
+                        set that cannot be represented by a single 16-bit value. The first part of a surrogate pair is in the range U+D800 to U+DBFF,
+                        and the second part is in the range U+DC00 to U+DFFF.
+            \uDB80-\uDBFF][\uDC00-\uDFFF]?: Matches the second part of a surrogate pair. The second part of a surrogate pair is in the range U+DC00
+                        to U+DFFF, and is optional.
+            [\uDB40\uDC20-\uDB40\uDC7F][\uDC00-\uDFFF]: Matches certain invalid UTF-16 sequences which should not appear in IDNs.
+            """
+            self.pattern = re.compile(
+                r"[\u00A0\u1680\u180E\u2000-\u200B\u202F\u205F\u2060\u2063\uFEFF\uFFF9-\uFFFB\uD800-\uDB7F\uDB80-\uDBFF]"
+                r"[\uDC00-\uDFFF]?|[\uDB40\uDC20-\uDB40\uDC7F][\uDC00-\uDFFF]"
+            )
+        else:
+            """Documentation:
+            This is a simple restriction to "no-unicode", using only ascii characters. Control characters are included.
+            """
+            self.pattern = re.compile(r"[^\x00-\x7F]+")
+    def __call__(self, text: str) -> str:
+        text = unicodedata.normalize("NFC", text)  # canon forms
+        text = self.pattern.sub(" ", text)  # pattern match
+        text = re.sub(" +", " ", text)  # collapse whitespaces
+        text = "".join(c for c in text if unicodedata.category(c) != "Cc")  # Remove any remaining non-printable characters
+        return text
+class TrueCaser:
+    """True-casing, is a capitalization normalization that returns text to its original capitalization.
+    This defends against attacks that wRIte TeXt lIkE spOngBoB.
+    Here, a simple POS-tagger is used.
+    """
+    uppercase_pos = ["PROPN"]  # Name POS tags that should be upper-cased
+    def __init__(self, backend="spacy"):
+        if backend == "spacy":
+            spacy_model = "en_core_web_sm"
+            try:
+                import spacy
+                self.nlp = spacy.load(spacy_model)
+            except:
+                import spacy.cli
+                spacy.cli.download(spacy_model)
+                import spacy
+                self.nlp = spacy.load(spacy_model)
+            self.normalize_fn = self._spacy_truecasing
+        else:
+            from nltk import pos_tag, word_tokenize  # noqa
+            import nltk
+            nltk.download("punkt")
+            nltk.download("averaged_perceptron_tagger")
+            nltk.download("universal_tagset")
+            self.normalize_fn = self._nltk_truecasing
+    def __call__(self, random_capitalized_string: str) -> str:
+        truecased_str = self.normalize_fn(random_capitalized_string)
+        return truecased_str
+    def _spacy_truecasing(self, random_capitalized_string: str):
+        doc = self.nlp(random_capitalized_string.lower())
+        POS = self.uppercase_pos
+        truecased_str = "".join([w.text_with_ws.capitalize() if w.pos_ in POS or w.is_sent_start else w.text_with_ws for w in doc])
+        return truecased_str
+    def _nltk_truecasing(self, random_capitalized_string: str):
+        from nltk import pos_tag, word_tokenize
+        import nltk
+        nltk.download("punkt")
+        nltk.download("averaged_perceptron_tagger")
+        nltk.download("universal_tagset")
+        POS = ["NNP", "NNPS"]
+        tagged_text = pos_tag(word_tokenize(random_capitalized_string.lower()))
+        truecased_str = " ".join([w.capitalize() if p in POS else w for (w, p) in tagged_text])
+        return truecased_str

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+spacy
+nltk
+scipy
+torch
+datasets
+transformers
+tokenizers
+accelerate
+text-generation>=0.3.1
+gradio>=3.21.0

watermark_processor.py ADDED Viewed

	@@ -0,0 +1,279 @@

+# coding=utf-8
+# Copyright 2023 Anonymous Authors of "A Watermark for Large Language Models"
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import collections
+from math import sqrt
+import scipy.stats
+import torch
+from torch import Tensor
+from tokenizers import Tokenizer
+from transformers import LogitsProcessor
+from nltk.util import ngrams
+from normalizers import normalization_strategy_lookup
+class WatermarkBase:
+    def __init__(
+        self,
+        vocab: list[int] = None,
+        gamma: float = 0.5,
+        delta: float = 2.0,
+        seeding_scheme: str = "simple_1",  # mostly unused/always default
+        hash_key: int = 15485863,  # just a large prime number to create a rng seed with sufficient bit width
+        select_green_tokens: bool = True,
+    ):
+        # watermarking parameters
+        self.vocab = vocab
+        self.vocab_size = len(vocab)
+        self.gamma = gamma
+        self.delta = delta
+        self.seeding_scheme = seeding_scheme
+        self.rng = None
+        self.hash_key = hash_key
+        self.select_green_tokens = select_green_tokens
+    def _seed_rng(self, input_ids: torch.LongTensor, seeding_scheme: str = None) -> None:
+        # can optionally override the seeding scheme,
+        # but uses the instance attr by default
+        if seeding_scheme is None:
+            seeding_scheme = self.seeding_scheme
+        if seeding_scheme == "simple_1":
+            assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
+            prev_token = input_ids[-1].item()
+            self.rng.manual_seed(self.hash_key * prev_token)
+        else:
+            raise NotImplementedError(f"Unexpected seeding_scheme: {seeding_scheme}")
+        return
+    def _get_greenlist_ids(self, input_ids: torch.LongTensor) -> list[int]:
+        # seed the rng using the previous tokens/prefix
+        # according to the seeding_scheme
+        self._seed_rng(input_ids)
+        greenlist_size = int(self.vocab_size * self.gamma)
+        vocab_permutation = torch.randperm(self.vocab_size, device=input_ids.device, generator=self.rng)
+        if self.select_green_tokens: # directly
+            greenlist_ids = vocab_permutation[:greenlist_size] # new
+        else: # select green via red
+            greenlist_ids = vocab_permutation[(self.vocab_size - greenlist_size) :]  # legacy behavior
+        return greenlist_ids
+class WatermarkLogitsProcessor(WatermarkBase, LogitsProcessor):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def _calc_greenlist_mask(self, scores: torch.FloatTensor, greenlist_token_ids) -> torch.BoolTensor:
+        # TODO lets see if we can lose this loop
+        green_tokens_mask = torch.zeros_like(scores)
+        for b_idx in range(len(greenlist_token_ids)):
+            green_tokens_mask[b_idx][greenlist_token_ids[b_idx]] = 1
+        final_mask = green_tokens_mask.bool()
+        return final_mask
+    def _bias_greenlist_logits(self, scores: torch.Tensor, greenlist_mask: torch.Tensor, greenlist_bias: float) -> torch.Tensor:
+        scores[greenlist_mask] = scores[greenlist_mask] + greenlist_bias
+        return scores
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        # this is lazy to allow us to colocate on the watermarked model's device
+        if self.rng is None:
+            self.rng = torch.Generator(device=input_ids.device)
+        # NOTE, it would be nice to get rid of this batch loop, but currently,
+        # the seed and partition operations are not tensor/vectorized, thus
+        # each sequence in the batch needs to be treated separately.
+        batched_greenlist_ids = [None for _ in range(input_ids.shape[0])]
+        for b_idx in range(input_ids.shape[0]):
+            greenlist_ids = self._get_greenlist_ids(input_ids[b_idx])
+            batched_greenlist_ids[b_idx] = greenlist_ids
+        green_tokens_mask = self._calc_greenlist_mask(scores=scores, greenlist_token_ids=batched_greenlist_ids)
+        scores = self._bias_greenlist_logits(scores=scores, greenlist_mask=green_tokens_mask, greenlist_bias=self.delta)
+        return scores
+class WatermarkDetector(WatermarkBase):
+    def __init__(
+        self,
+        *args,
+        device: torch.device = None,
+        tokenizer: Tokenizer = None,
+        z_threshold: float = 4.0,
+        normalizers: list[str] = ["unicode"],  # or also: ["unicode", "homoglyphs", "truecase"]
+        ignore_repeated_bigrams: bool = False,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        # also configure the metrics returned/preprocessing options
+        assert device, "Must pass device"
+        assert tokenizer, "Need an instance of the generating tokenizer to perform detection"
+        self.tokenizer = tokenizer
+        self.device = device
+        self.z_threshold = z_threshold
+        self.rng = torch.Generator(device=self.device)
+        if self.seeding_scheme == "simple_1":
+            self.min_prefix_len = 1
+        else:
+            raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
+        self.normalizers = []
+        for normalization_strategy in normalizers:
+            self.normalizers.append(normalization_strategy_lookup(normalization_strategy))
+        self.ignore_repeated_bigrams = ignore_repeated_bigrams
+        if self.ignore_repeated_bigrams:
+            assert self.seeding_scheme == "simple_1", "No repeated bigram credit variant assumes the single token seeding scheme."
+    def _compute_z_score(self, observed_count, T):
+        # count refers to number of green tokens, T is total number of tokens
+        expected_count = self.gamma
+        numer = observed_count - expected_count * T
+        denom = sqrt(T * expected_count * (1 - expected_count))
+        z = numer / denom
+        return z
+    def _compute_p_value(self, z):
+        p_value = scipy.stats.norm.sf(z)
+        return p_value
+    def _score_sequence(
+        self,
+        input_ids: Tensor,
+        return_num_tokens_scored: bool = True,
+        return_num_green_tokens: bool = True,
+        return_green_fraction: bool = True,
+        return_green_token_mask: bool = False,
+        return_z_score: bool = True,
+        return_p_value: bool = True,
+    ):
+        if self.ignore_repeated_bigrams:
+            # Method that only counts a green/red hit once per unique bigram.
+            # New num total tokens scored (T) becomes the number unique bigrams.
+            # We iterate over all unqiue token bigrams in the input, computing the greenlist
+            # induced by the first token in each, and then checking whether the second
+            # token falls in that greenlist.
+            assert return_green_token_mask == False, "Can't return the green/red mask when ignoring repeats."
+            bigram_table = {}
+            token_bigram_generator = ngrams(input_ids.cpu().tolist(), 2)
+            freq = collections.Counter(token_bigram_generator)
+            num_tokens_scored = len(freq.keys())
+            for idx, bigram in enumerate(freq.keys()):
+                prefix = torch.tensor([bigram[0]], device=self.device) # expects a 1-d prefix tensor on the randperm device
+                greenlist_ids = self._get_greenlist_ids(prefix)
+                bigram_table[bigram] = True if bigram[1] in greenlist_ids else False
+            green_token_count = sum(bigram_table.values())
+        else:
+            num_tokens_scored = len(input_ids) - self.min_prefix_len
+            if num_tokens_scored < 1:
+                raise ValueError((f"Must have at least {1} token to score after "
+                                f"the first min_prefix_len={self.min_prefix_len} tokens required by the seeding scheme."))
+            # Standard method.
+            # Since we generally need at least 1 token (for the simplest scheme)
+            # we start the iteration over the token sequence with a minimum
+            # num tokens as the first prefix for the seeding scheme,
+            # and at each step, compute the greenlist induced by the
+            # current prefix and check if the current token falls in the greenlist.
+            green_token_count, green_token_mask = 0, []
+            for idx in range(self.min_prefix_len, len(input_ids)):
+                curr_token = input_ids[idx]
+                greenlist_ids = self._get_greenlist_ids(input_ids[:idx])
+                if curr_token in greenlist_ids:
+                    green_token_count += 1
+                    green_token_mask.append(True)
+                else:
+                    green_token_mask.append(False)
+        score_dict = dict()
+        if return_num_tokens_scored:
+            score_dict.update(dict(num_tokens_scored=num_tokens_scored))
+        if return_num_green_tokens:
+            score_dict.update(dict(num_green_tokens=green_token_count))
+        if return_green_fraction:
+            score_dict.update(dict(green_fraction=(green_token_count / num_tokens_scored)))
+        if return_z_score:
+            score_dict.update(dict(z_score=self._compute_z_score(green_token_count, num_tokens_scored)))
+        if return_p_value:
+            z_score = score_dict.get("z_score")
+            if z_score is None:
+                z_score = self._compute_z_score(green_token_count, num_tokens_scored)
+            score_dict.update(dict(p_value=self._compute_p_value(z_score)))
+        if return_green_token_mask:
+            score_dict.update(dict(green_token_mask=green_token_mask))
+        return score_dict
+    def detect(
+        self,
+        text: str = None,
+        tokenized_text: list[int] = None,
+        return_prediction: bool = True,
+        return_scores: bool = True,
+        z_threshold: float = None,
+        **kwargs,
+    ) -> dict:
+        assert (text is not None) ^ (tokenized_text is not None), "Must pass either the raw or tokenized string"
+        if return_prediction:
+            kwargs["return_p_value"] = True  # to return the "confidence":=1-p of positive detections
+        # run optional normalizers on text
+        for normalizer in self.normalizers:
+            text = normalizer(text)
+        if len(self.normalizers) > 0:
+            print(f"Text after normalization:\n\n{text}\n")
+        if tokenized_text is None:
+            assert self.tokenizer is not None, (
+                "Watermark detection on raw string ",
+                "requires an instance of the tokenizer ",
+                "that was used at generation time.",
+            )
+            tokenized_text = self.tokenizer(text, return_tensors="pt", add_special_tokens=False)["input_ids"][0].to(self.device)
+            if tokenized_text[0] == self.tokenizer.bos_token_id:
+                tokenized_text = tokenized_text[1:]
+        else:
+            # try to remove the bos_tok at beginning if it's there
+            if (self.tokenizer is not None) and (tokenized_text[0] == self.tokenizer.bos_token_id):
+                tokenized_text = tokenized_text[1:]
+        # call score method
+        output_dict = {}
+        score_dict = self._score_sequence(tokenized_text, **kwargs)
+        if return_scores:
+            output_dict.update(score_dict)
+        # if passed return_prediction then perform the hypothesis test and return the outcome
+        if return_prediction:
+            z_threshold = z_threshold if z_threshold else self.z_threshold
+            assert z_threshold is not None, "Need a threshold in order to decide outcome of detection test"
+            output_dict["prediction"] = score_dict["z_score"] > z_threshold
+            if output_dict["prediction"]:
+                output_dict["confidence"] = 1 - score_dict["p_value"]
+        return output_dict