lm-watermarking

Runtime error

App Files Files Community

jwkirchenbauer commited on Mar 10, 2023

Commit

507fd5a

1 Parent(s): a7d76f1

added inference api functionality

Browse files

Files changed (2) hide show

demo_watermark.py +178 -46
requirements.txt +2 -1

demo_watermark.py CHANGED Viewed

@@ -32,6 +32,14 @@ from transformers import (AutoTokenizer,
 from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
 def str2bool(v):
     """Util function for user friendly boolean flag args"""
     if isinstance(v, bool):
@@ -200,13 +208,69 @@ def load_model(args):
     return model, tokenizer, device
-def generate(prompt, args, model=None, device=None, tokenizer=None):
     """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
        and generate watermarked text by passing it to the generate method of the model
        as a logits processor. """
     print(f"Generating with {args}")
     watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
                                                     gamma=args.gamma,
                                                     delta=args.delta,
@@ -235,16 +299,6 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
         logits_processor=LogitsProcessorList([watermark_processor]),
         **gen_kwargs
     )
-    if args.prompt_max_length:
-        pass
-    elif hasattr(model.config,"max_position_embedding"):
-        args.prompt_max_length = model.config.max_position_embeddings-args.max_new_tokens
-    else:
-        args.prompt_max_length = 2048-args.max_new_tokens
-    tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True, max_length=args.prompt_max_length).to(device)
-    truncation_warning = True if tokd_input["input_ids"].shape[-1] == args.prompt_max_length else False
-    redecoded_input = tokenizer.batch_decode(tokd_input["input_ids"], skip_special_tokens=True)[0]
     torch.manual_seed(args.generation_seed)
     output_without_watermark = generate_without_watermark(**tokd_input)
@@ -266,8 +320,9 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
             int(truncation_warning),
             decoded_output_without_watermark,
             decoded_output_with_watermark,
-            args)
-            # decoded_output_with_watermark)
 def format_names(s):
     """Format names for the gradio demo interface"""
@@ -301,9 +356,12 @@ def list_format_scores(score_dict, detection_threshold):
         lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
     return lst_2d
-def detect(input_text, args, device=None, tokenizer=None):
     """Instantiate the WatermarkDetection object and call detect on
         the input text returning the scores and outcome of the test"""
     watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                         gamma=args.gamma,
                                         seeding_scheme=args.seeding_scheme,
@@ -313,20 +371,29 @@ def detect(input_text, args, device=None, tokenizer=None):
                                         normalizers=args.normalizers,
                                         ignore_repeated_bigrams=args.ignore_repeated_bigrams,
                                         select_green_tokens=args.select_green_tokens)
-    if len(input_text)-1 > watermark_detector.min_prefix_len:
-        score_dict = watermark_detector.detect(input_text)
-        # output = str_format_scores(score_dict, watermark_detector.z_threshold)
-        output = list_format_scores(score_dict, watermark_detector.z_threshold)
     else:
-        # output = (f"Error: string not long enough to compute watermark presence.")
         output = [["Error","string too short to compute metrics"]]
         output += [["",""] for _ in range(6)]
-    return output, args
 def run_gradio(args, model=None, device=None, tokenizer=None):
     """Define and launch the gradio demo interface"""
-    generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
-    detect_partial = partial(detect, device=device, tokenizer=tokenizer)
     with gr.Blocks() as demo:
         # Top section, greeting and instructions
@@ -343,11 +410,20 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                 [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
                 """
                 )
-                gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
         # Construct state for parameters, define updates and toggles
         default_prompt = args.__dict__.pop("default_prompt")
         session_args = gr.State(value=args)
         with gr.Tab("Welcome"):
             with gr.Row():
@@ -448,7 +524,7 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                     with gr.Row():
                         generation_seed = gr.Number(label="Generation Seed",value=args.generation_seed, interactive=True)
                     with gr.Row():
-                        n_beams = gr.Dropdown(label="Number of Beams",choices=list(range(1,11,1)), value=args.n_beams, visible=(not args.use_sampling))
                     with gr.Row():
                         max_new_tokens = gr.Slider(label="Max Generated Tokens", minimum=10, maximum=1000, step=10, value=args.max_new_tokens)
@@ -561,18 +637,19 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                 """)
         # Register main generation tab click, outputing generations as well as a the encoded+redecoded+potentially truncated prompt and flag
-        generate_btn.click(fn=generate_partial, inputs=[prompt,session_args], outputs=[redecoded_input, truncation_warning, output_without_watermark, output_with_watermark,session_args])
         # Show truncated version of prompt if truncation occurred
         redecoded_input.change(fn=truncate_prompt, inputs=[redecoded_input,truncation_warning,prompt,session_args], outputs=[prompt,session_args])
         # Call detection when the outputs (of the generate function) are updated
-        output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
         # Register main detection tab click
-        # detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
-        detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args], api_name="detection")
         # State management logic
         # update callbacks that change the state dict
         def update_sampling_temp(session_state, value): session_state.sampling_temp = float(value); return session_state
         def update_generation_seed(session_state, value): session_state.generation_seed = int(value); return session_state
         def update_gamma(session_state, value): session_state.gamma = float(value); return session_state
@@ -594,17 +671,56 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
                 return gr.update(visible=False)
             elif value == "greedy":
                 return gr.update(visible=True)
         def update_n_beams(session_state, value): session_state.n_beams = value; return session_state
         def update_max_new_tokens(session_state, value): session_state.max_new_tokens = int(value); return session_state
         def update_ignore_repeated_bigrams(session_state, value): session_state.ignore_repeated_bigrams = value; return session_state
         def update_normalizers(session_state, value): session_state.normalizers = value; return session_state
         def update_seed_separately(session_state, value): session_state.seed_separately = value; return session_state
         def update_select_green_tokens(session_state, value): session_state.select_green_tokens = value; return session_state
-        # registering callbacks for toggling the visibilty of certain parameters
         decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[sampling_temp])
         decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[generation_seed])
         decoding.change(toggle_sampling_vis_inv,inputs=[decoding], outputs=[n_beams])
         # registering all state update callbacks
         decoding.change(update_decoding,inputs=[session_args, decoding], outputs=[session_args])
         sampling_temp.change(update_sampling_temp,inputs=[session_args, sampling_temp], outputs=[session_args])
         generation_seed.change(update_generation_seed,inputs=[session_args, generation_seed], outputs=[session_args])
@@ -620,27 +736,29 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
         # register additional callback on button clicks that updates the shown parameters window
         generate_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
         detect_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
         # When the parameters change, display the update and fire detection, since some detection params dont change the model output.
         gamma.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
-        gamma.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        gamma.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
-        gamma.change(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result,session_args])
         detection_z_threshold.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
-        detection_z_threshold.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        detection_z_threshold.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
-        detection_z_threshold.change(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result,session_args])
         ignore_repeated_bigrams.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
-        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
-        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result,session_args])
         normalizers.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
-        normalizers.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        normalizers.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
-        normalizers.change(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result,session_args])
         select_green_tokens.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
-        select_green_tokens.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
-        select_green_tokens.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
-        select_green_tokens.change(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result,session_args])
     demo.queue(concurrency_count=3)
@@ -691,9 +809,23 @@ def main(args):
         "on their body and head. The diamondback terrapin has large webbed "
         "feet.[9] The species is"
     )
     args.default_prompt = input_text
     # Generate and detect, report to stdout
     if not args.skip_model_load:
@@ -702,7 +834,7 @@ def main(args):
         print("Prompt:")
         print(input_text)
-        _, _, decoded_output_without_watermark, decoded_output_with_watermark, _ = generate(input_text,
                                                                                             args,
                                                                                             model=model,
                                                                                             device=device,

 from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
+# FIXME correct lengths for all models
+API_MODEL_MAP = {
+    "bigscience/bloomz"       : {"max_length": 2048, "gamma": 0.5, "delta": 2.0},
+    "google/flan-ul2"         : {"max_length": 2048, "gamma": 0.5, "delta": 2.0},
+    "google/flan-t5-xxl"      : {"max_length": 2048, "gamma": 0.5, "delta": 2.0},
+    "EleutherAI/gpt-neox-20b" : {"max_length": 2048, "gamma": 0.5, "delta": 2.0},
+}
 def str2bool(v):
     """Util function for user friendly boolean flag args"""
     if isinstance(v, bool):
     return model, tokenizer, device
+from text_generation import InferenceAPIClient
+def generate_with_api(prompt, args):
+    hf_api_key = os.environ.get("HF_API_KEY")
+    if hf_api_key is None:
+        raise ValueError("HF_API_KEY environment variable not set, cannot use HF API to generate text.")
+    client = InferenceAPIClient(args.model_name_or_path, token=hf_api_key)
+    assert args.n_beams == 1, "HF API models do not support beam search."
+    generation_params = {
+            "max_new_tokens": args.max_new_tokens,
+            "do_sample": args.use_sampling,
+        }
+    if args.use_sampling:
+        generation_params["temperature"] = args.sampling_temp
+        generation_params["seed"] = args.generation_seed
+    generation_params["watermarking"] = False
+    output = client.generate(prompt, **generation_params)
+    output_text_without_watermark = output.generated_text
+    generation_params["watermarking"] = True
+    output = client.generate(prompt, **generation_params)
+    output_text_with_watermark = output.generated_text
+    return (output_text_without_watermark,
+            output_text_with_watermark)
+def generate(prompt, args, tokenizer, model=None, device=None):
     """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
        and generate watermarked text by passing it to the generate method of the model
        as a logits processor. """
     print(f"Generating with {args}")
+    # This applies to both the local and API model scenarios
+    if args.prompt_max_length:
+        pass
+    elif args.model_name_or_path in API_MODEL_MAP:
+        args.prompt_max_length = API_MODEL_MAP[args.model_name_or_path]["max_length"]-args.max_new_tokens
+    elif hasattr(model.config,"max_position_embedding"):
+        args.prompt_max_length = model.config.max_position_embeddings-args.max_new_tokens
+    else:
+        args.prompt_max_length = 2048-args.max_new_tokens
+    tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True, max_length=args.prompt_max_length).to(device)
+    truncation_warning = True if tokd_input["input_ids"].shape[-1] == args.prompt_max_length else False
+    redecoded_input = tokenizer.batch_decode(tokd_input["input_ids"], skip_special_tokens=True)[0]
+    if args.model_name_or_path in API_MODEL_MAP:
+        api_outputs = generate_with_api(prompt, args)
+        decoded_output_without_watermark = api_outputs[0]
+        decoded_output_with_watermark = api_outputs[1]
+        return (redecoded_input,
+                int(truncation_warning),
+                decoded_output_without_watermark,
+                decoded_output_with_watermark,
+                args,
+                tokenizer)
     watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
                                                     gamma=args.gamma,
                                                     delta=args.delta,
         logits_processor=LogitsProcessorList([watermark_processor]),
         **gen_kwargs
     )
     torch.manual_seed(args.generation_seed)
     output_without_watermark = generate_without_watermark(**tokd_input)
             int(truncation_warning),
             decoded_output_without_watermark,
             decoded_output_with_watermark,
+            args,
+            tokenizer)
 def format_names(s):
     """Format names for the gradio demo interface"""
         lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
     return lst_2d
+def detect(input_text, args, tokenizer, device=None):
     """Instantiate the WatermarkDetection object and call detect on
         the input text returning the scores and outcome of the test"""
+    print(f"Detecting with {args}")
+    print(f"Detection Tokenizer: {type(tokenizer)}")
     watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                         gamma=args.gamma,
                                         seeding_scheme=args.seeding_scheme,
                                         normalizers=args.normalizers,
                                         ignore_repeated_bigrams=args.ignore_repeated_bigrams,
                                         select_green_tokens=args.select_green_tokens)
+    # if len(input_text)-1 > watermark_detector.min_prefix_len:
+    error = False
+    if input_text == "":
+        error = True
     else:
+        try:
+            score_dict = watermark_detector.detect(input_text)
+            # output = str_format_scores(score_dict, watermark_detector.z_threshold)
+            output = list_format_scores(score_dict, watermark_detector.z_threshold)
+        except ValueError as e:
+            print(e)
+            error = True
+    if error:
         output = [["Error","string too short to compute metrics"]]
         output += [["",""] for _ in range(6)]
+    return output, args, tokenizer
 def run_gradio(args, model=None, device=None, tokenizer=None):
     """Define and launch the gradio demo interface"""
+    # generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
+    # detect_partial = partial(detect, device=device, tokenizer=tokenizer)
+    generate_partial = partial(generate, model=model, device=device)
+    detect_partial = partial(detect, device=device)
     with gr.Blocks() as demo:
         # Top section, greeting and instructions
                 [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
                 """
                 )
+                # gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
+                # if model_name_or_path at startup not one of the API models then add to dropdown
+                all_models = sorted(list(set(list(API_MODEL_MAP.keys())+[args.model_name_or_path])))
+                model_selector = gr.Dropdown(
+                    all_models,
+                    value=args.model_name_or_path,
+                    label="Language Model",
+                )
         # Construct state for parameters, define updates and toggles
         default_prompt = args.__dict__.pop("default_prompt")
         session_args = gr.State(value=args)
+        # note that state obj automatically calls value if it's a callable, want to avoid calling tokenizer at startup
+        session_tokenizer = gr.State(value=lambda : tokenizer)
         with gr.Tab("Welcome"):
             with gr.Row():
                     with gr.Row():
                         generation_seed = gr.Number(label="Generation Seed",value=args.generation_seed, interactive=True)
                     with gr.Row():
+                        n_beams = gr.Dropdown(label="Number of Beams",choices=list(range(1,11,1)), value=args.n_beams, visible=((not args.use_sampling) and (not args.model_name_or_path in API_MODEL_MAP)))
                     with gr.Row():
                         max_new_tokens = gr.Slider(label="Max Generated Tokens", minimum=10, maximum=1000, step=10, value=args.max_new_tokens)
                 """)
         # Register main generation tab click, outputing generations as well as a the encoded+redecoded+potentially truncated prompt and flag
+        generate_btn.click(fn=generate_partial, inputs=[prompt,session_args,session_tokenizer], outputs=[redecoded_input, truncation_warning, output_without_watermark, output_with_watermark,session_args,session_tokenizer])
         # Show truncated version of prompt if truncation occurred
         redecoded_input.change(fn=truncate_prompt, inputs=[redecoded_input,truncation_warning,prompt,session_args], outputs=[prompt,session_args])
         # Call detection when the outputs (of the generate function) are updated
+        output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
         # Register main detection tab click
+        # detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result, session_args,session_tokenizer])
+        detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result, session_args,session_tokenizer], api_name="detection")
         # State management logic
         # update callbacks that change the state dict
+        def update_model(session_state, value): session_state.model_name_or_path = value; return session_state
         def update_sampling_temp(session_state, value): session_state.sampling_temp = float(value); return session_state
         def update_generation_seed(session_state, value): session_state.generation_seed = int(value); return session_state
         def update_gamma(session_state, value): session_state.gamma = float(value); return session_state
                 return gr.update(visible=False)
             elif value == "greedy":
                 return gr.update(visible=True)
+        # if model name is in the list of api models, set the num beams parameter to 1 and hide n_beams
+        def toggle_vis_for_api_model(value):
+            if value in API_MODEL_MAP:
+                return gr.update(visible=False)
+            else:
+                return gr.update(visible=True)
+        def toggle_beams_for_api_model(value, orig_n_beams):
+            if value in API_MODEL_MAP:
+                return gr.update(value=1)
+            else:
+                return gr.update(value=orig_n_beams)
+        # if model name is in the list of api models, set the interactive parameter to false
+        def toggle_interactive_for_api_model(value):
+            if value in API_MODEL_MAP:
+                return gr.update(interactive=False)
+            else:
+                return gr.update(interactive=True)
+        # if model name is in the list of api models, set gamma and delta based on API map
+        def toggle_gamma_for_api_model(value, orig_gamma):
+            if value in API_MODEL_MAP:
+                return gr.update(value=API_MODEL_MAP[value]["gamma"])
+            else:
+                return gr.update(value=orig_gamma)
+        def toggle_delta_for_api_model(value, orig_delta):
+            if value in API_MODEL_MAP:
+                return gr.update(value=API_MODEL_MAP[value]["delta"])
+            else:
+                return gr.update(value=orig_delta)
         def update_n_beams(session_state, value): session_state.n_beams = value; return session_state
         def update_max_new_tokens(session_state, value): session_state.max_new_tokens = int(value); return session_state
         def update_ignore_repeated_bigrams(session_state, value): session_state.ignore_repeated_bigrams = value; return session_state
         def update_normalizers(session_state, value): session_state.normalizers = value; return session_state
         def update_seed_separately(session_state, value): session_state.seed_separately = value; return session_state
         def update_select_green_tokens(session_state, value): session_state.select_green_tokens = value; return session_state
+        def update_tokenizer(model_name_or_path): return AutoTokenizer.from_pretrained(model_name_or_path)
+        # registering callbacks for toggling the visibilty of certain parameters based on the values of others
         decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[sampling_temp])
         decoding.change(toggle_sampling_vis,inputs=[decoding], outputs=[generation_seed])
         decoding.change(toggle_sampling_vis_inv,inputs=[decoding], outputs=[n_beams])
+        model_selector.change(toggle_vis_for_api_model,inputs=[model_selector], outputs=[n_beams])
+        decoding.change(toggle_vis_for_api_model,inputs=[model_selector], outputs=[n_beams])
+        model_selector.change(toggle_beams_for_api_model,inputs=[model_selector,n_beams], outputs=[n_beams])
+        model_selector.change(toggle_interactive_for_api_model,inputs=[model_selector], outputs=[gamma])
+        model_selector.change(toggle_interactive_for_api_model,inputs=[model_selector], outputs=[delta])
+        model_selector.change(toggle_gamma_for_api_model,inputs=[model_selector,gamma], outputs=[gamma])
+        model_selector.change(toggle_delta_for_api_model,inputs=[model_selector,delta], outputs=[delta])
+        model_selector.change(update_tokenizer,inputs=[model_selector], outputs=[session_tokenizer])
         # registering all state update callbacks
+        model_selector.change(update_model,inputs=[session_args, model_selector], outputs=[session_args])
         decoding.change(update_decoding,inputs=[session_args, decoding], outputs=[session_args])
         sampling_temp.change(update_sampling_temp,inputs=[session_args, sampling_temp], outputs=[session_args])
         generation_seed.change(update_generation_seed,inputs=[session_args, generation_seed], outputs=[session_args])
         # register additional callback on button clicks that updates the shown parameters window
         generate_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
         detect_btn.click(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        model_selector.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
         # When the parameters change, display the update and fire detection, since some detection params dont change the model output.
+        delta.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
         gamma.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        gamma.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        gamma.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
+        gamma.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer])
         detection_z_threshold.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        detection_z_threshold.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        detection_z_threshold.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
+        detection_z_threshold.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer])
         ignore_repeated_bigrams.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
+        ignore_repeated_bigrams.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer])
         normalizers.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        normalizers.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        normalizers.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
+        normalizers.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer])
         select_green_tokens.change(lambda value: str(value), inputs=[session_args], outputs=[current_parameters])
+        select_green_tokens.change(fn=detect_partial, inputs=[output_without_watermark,session_args,session_tokenizer], outputs=[without_watermark_detection_result,session_args,session_tokenizer])
+        select_green_tokens.change(fn=detect_partial, inputs=[output_with_watermark,session_args,session_tokenizer], outputs=[with_watermark_detection_result,session_args,session_tokenizer])
+        select_green_tokens.change(fn=detect_partial, inputs=[detection_input,session_args,session_tokenizer], outputs=[detection_result,session_args,session_tokenizer])
     demo.queue(concurrency_count=3)
         "on their body and head. The diamondback terrapin has large webbed "
         "feet.[9] The species is"
     )
+    # teaser example
+    # input_text = (
+    #     "In this work, we study watermarking of language model output. "
+    #     "A watermark is a hidden pattern in text that is imperceptible to humans, "
+    #     "while making the text algorithmically identifiable as synthetic. "
+    #     "We propose an efficient watermark that makes synthetic text detectable "
+    #     "from short spans of tokens (as few as 25 words), while false-positives "
+    #     "(where human text is marked as machine-generated) are statistically improbable. "
+    #     "The watermark detection algorithm can be made public, enabling third parties "
+    #     "(e.g., social media platforms) to run it themselves, or it can be kept private "
+    #     "and run behind an API.  We seek a watermark with the following properties:\n"
+    # )
     args.default_prompt = input_text
     # Generate and detect, report to stdout
     if not args.skip_model_load:
         print("Prompt:")
         print(input_text)
+        _, _, decoded_output_without_watermark, decoded_output_with_watermark, _, _ = generate(input_text,
                                                                                             args,
                                                                                             model=model,
                                                                                             device=device,

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ scipy
 torch
 transformers
 tokenizers
-accelerate

 torch
 transformers
 tokenizers
+accelerate
+text-generation