jwkirchenbauer commited on
Commit
a7d76f1
1 Parent(s): a230391

Added a welcome, paper tldr tab

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. demo_watermark.py +90 -45
app.py CHANGED
@@ -35,7 +35,7 @@ arg_dict = {
35
  'sampling_temp': 0.7,
36
  'use_gpu': True,
37
  'seeding_scheme': 'simple_1',
38
- 'gamma': 0.25,
39
  'delta': 2.0,
40
  'normalizers': '',
41
  'ignore_repeated_bigrams': False,
 
35
  'sampling_temp': 0.7,
36
  'use_gpu': True,
37
  'seeding_scheme': 'simple_1',
38
+ 'gamma': 0.5,
39
  'delta': 2.0,
40
  'normalizers': '',
41
  'ignore_repeated_bigrams': False,
demo_watermark.py CHANGED
@@ -343,49 +343,63 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
343
  [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
344
  """
345
  )
346
- # with gr.Column(scale=2):
347
- # pass
348
- # ![visitor badge](https://visitor-badge.glitch.me/badge?page_id=tomg-group-umd_lm-watermarking) # buggy
349
-
350
- with gr.Accordion("Understanding the output metrics",open=False):
351
- gr.Markdown(
352
- """
353
- - `z-score threshold` : The cuttoff for the hypothesis test
354
- - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
355
- The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
356
- a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
357
- described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
358
- - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
359
- - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
360
- - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
361
- we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
362
- - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
363
- observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
364
- If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
365
- - `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
366
- - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
367
- the confidence of the detection based on the unlikeliness of this `z-score` observation.
368
- """
369
- )
370
-
371
- with gr.Accordion("A note on model capability",open=True):
372
- gr.Markdown(
373
- """
374
- This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
375
-
376
- Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
377
- For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
378
- Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
379
- Longer prompts that end mid-sentence will result in more fluent generations.
380
- """
381
- )
382
- gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
383
 
384
  # Construct state for parameters, define updates and toggles
385
  default_prompt = args.__dict__.pop("default_prompt")
386
  session_args = gr.State(value=args)
387
 
388
- with gr.Tab("Generate and Detect"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  with gr.Row():
391
  prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
@@ -463,7 +477,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
463
  with gr.Column(scale=1):
464
  select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
465
 
466
- with gr.Accordion("Understanding the settings",open=False):
 
467
  gr.Markdown(
468
  """
469
  #### Generation Parameters:
@@ -515,6 +530,27 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
515
  """
516
  )
517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  gr.HTML("""
519
  <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
520
  Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
@@ -532,7 +568,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
532
  output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
533
  output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
534
  # Register main detection tab click
535
- detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
 
536
 
537
  # State management logic
538
  # update callbacks that change the state dict
@@ -624,10 +661,15 @@ def main(args):
624
  model, tokenizer, device = load_model(args)
625
  else:
626
  model, tokenizer, device = None, None, None
 
 
 
 
 
627
 
628
- # Generate and detect, report to stdout
629
- if not args.skip_model_load:
630
- input_text = (
631
  "The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
632
  "species of turtle native to the brackish coastal tidal marshes of the "
633
  "Northeastern and southern United States, and in Bermuda.[6] It belongs "
@@ -648,9 +690,12 @@ def main(args):
648
  "or white. All have a unique pattern of wiggly, black markings or spots "
649
  "on their body and head. The diamondback terrapin has large webbed "
650
  "feet.[9] The species is"
651
- )
652
 
653
- args.default_prompt = input_text
 
 
 
654
 
655
  term_width = 80
656
  print("#"*term_width)
 
343
  [![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)
344
  """
345
  )
346
+ gr.Markdown(f"Language model: {args.model_name_or_path} {'(float16 mode)' if args.load_fp16 else ''}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
  # Construct state for parameters, define updates and toggles
349
  default_prompt = args.__dict__.pop("default_prompt")
350
  session_args = gr.State(value=args)
351
 
352
+ with gr.Tab("Welcome"):
353
+ with gr.Row():
354
+ with gr.Column(scale=2):
355
+ gr.Markdown(
356
+ """
357
+ Potential harms of large language models can be mitigated by *watermarking* a model's output.
358
+ *Watermarks* are embedded signals in the generated text that are invisible to humans but algorithmically
359
+ detectable, that allow *anyone* to later check whether a given span of text
360
+ was likely to have been generated by a model that uses the watermark.
361
+
362
+ This space showcases a watermarking approach that can be applied to _any_ generative language model.
363
+ For demonstration purposes, the space serves a "small" multi-billion parameter model (see the following note for caveats due to small size).
364
+ """
365
+ )
366
+ with gr.Accordion("A note on model generation quality",open=False):
367
+ gr.Markdown(
368
+ """
369
+ This demo uses open-source language models that fit on a single GPU. These models are less powerful than proprietary commercial tools like ChatGPT, Claude, or Bard.
370
+
371
+ Importantly, we use a language model that is designed to "complete" your prompt, and not a model this is fine-tuned to follow instructions.
372
+ For best results, prompt the model with a few sentences that form the beginning of a paragraph, and then allow it to "continue" your paragraph.
373
+ Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
374
+ Longer prompts that end mid-sentence will result in more fluent generations.
375
+ """
376
+ )
377
+ gr.Markdown(
378
+ """
379
+ **[Generate & Detect]**: The first tab shows that the watermark can be embedded with
380
+ negligible impact on text quality. You can try any prompt and compare the quality of
381
+ normal text (*Output Without Watermark*) to the watermarked text (*Output With Watermark*) below it.
382
+ Metrics on the right show that the watermark can be reliably detected.
383
+ Detection is very efficient and does not use the language model or its parameters.
384
+
385
+ **[Detector Only]**: You can also copy-paste the watermarked text (or any other text)
386
+ into the second tab. This can be used to see how many sentences you could remove and still detect the watermark.
387
+ You can also verify here that the detection has, by design, a low false-positive rate;
388
+ This means that human-generated text that you copy into this detector will not be marked as machine-generated.
389
+
390
+ You can find more details on how this watermark functions in our [ArXiv preprint](https://arxiv.org/abs/2301.10226).
391
+ """
392
+ )
393
+
394
+ with gr.Column(scale=1):
395
+ gr.Markdown(
396
+ """
397
+ ![](https://drive.google.com/uc?export=view&id=1yVLPcjm-xvaCjQyc3FGLsWIU84v1QRoC)
398
+ """
399
+ )
400
+
401
+
402
+ with gr.Tab("Generate & Detect"):
403
 
404
  with gr.Row():
405
  prompt = gr.Textbox(label=f"Prompt", interactive=True,lines=10,max_lines=10, value=default_prompt)
 
477
  with gr.Column(scale=1):
478
  select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
479
 
480
+
481
+ with gr.Accordion("What do the settings do?",open=False):
482
  gr.Markdown(
483
  """
484
  #### Generation Parameters:
 
530
  """
531
  )
532
 
533
+ with gr.Accordion("What do the output metrics mean?",open=False):
534
+ gr.Markdown(
535
+ """
536
+ - `z-score threshold` : The cuttoff for the hypothesis test
537
+ - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
538
+ The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
539
+ a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
540
+ described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
541
+ - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
542
+ - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
543
+ - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
544
+ we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
545
+ - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
546
+ observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
547
+ If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
548
+ - `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
549
+ - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
550
+ the confidence of the detection based on the unlikeliness of this `z-score` observation.
551
+ """
552
+ )
553
+
554
  gr.HTML("""
555
  <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
556
  Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
 
568
  output_without_watermark.change(fn=detect_partial, inputs=[output_without_watermark,session_args], outputs=[without_watermark_detection_result,session_args])
569
  output_with_watermark.change(fn=detect_partial, inputs=[output_with_watermark,session_args], outputs=[with_watermark_detection_result,session_args])
570
  # Register main detection tab click
571
+ # detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args])
572
+ detect_btn.click(fn=detect_partial, inputs=[detection_input,session_args], outputs=[detection_result, session_args], api_name="detection")
573
 
574
  # State management logic
575
  # update callbacks that change the state dict
 
661
  model, tokenizer, device = load_model(args)
662
  else:
663
  model, tokenizer, device = None, None, None
664
+ tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
665
+ if args.use_gpu:
666
+ device = "cuda" if torch.cuda.is_available() else "cpu"
667
+ else:
668
+ device = "cpu"
669
 
670
+
671
+ # terrapin example
672
+ input_text = (
673
  "The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
674
  "species of turtle native to the brackish coastal tidal marshes of the "
675
  "Northeastern and southern United States, and in Bermuda.[6] It belongs "
 
690
  "or white. All have a unique pattern of wiggly, black markings or spots "
691
  "on their body and head. The diamondback terrapin has large webbed "
692
  "feet.[9] The species is"
693
+ )
694
 
695
+ args.default_prompt = input_text
696
+
697
+ # Generate and detect, report to stdout
698
+ if not args.skip_model_load:
699
 
700
  term_width = 80
701
  print("#"*term_width)