jwkirchenbauer commited on
Commit
29d8de2
·
1 Parent(s): cb2cffc

Gradio details panels, docstrings

Browse files
Files changed (2) hide show
  1. demo_watermark.py +95 -21
  2. requirements.txt +1 -0
demo_watermark.py CHANGED
@@ -33,6 +33,7 @@ from transformers import (AutoTokenizer,
33
  from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
34
 
35
  def str2bool(v):
 
36
  if isinstance(v, bool):
37
  return v
38
  if v.lower() in ('yes', 'true', 't', 'y', '1'):
@@ -43,6 +44,7 @@ def str2bool(v):
43
  raise argparse.ArgumentTypeError('Boolean value expected.')
44
 
45
  def parse_args():
 
46
 
47
  parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
48
 
@@ -164,6 +166,8 @@ def parse_args():
164
  return args
165
 
166
  def load_model(args):
 
 
167
  args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
168
  args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
169
  if args.is_seq2seq_model:
@@ -185,7 +189,10 @@ def load_model(args):
185
  return model, tokenizer, device
186
 
187
  def generate(prompt, args, model=None, device=None, tokenizer=None):
188
-
 
 
 
189
  print(f"Generating with {args}")
190
 
191
  watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
@@ -251,25 +258,16 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
251
  # decoded_output_with_watermark)
252
 
253
  def format_names(s):
 
254
  s=s.replace("num_tokens_scored","Tokens Counted (T)")
255
  s=s.replace("num_green_tokens","# Tokens in Greenlist")
256
  s=s.replace("green_fraction","Fraction of T in Greenlist")
257
  s=s.replace("z_score","z-score")
258
  s=s.replace("p_value","p value")
259
  return s
260
- # def str_format_scores(score_dict, detection_threshold):
261
- # output_str = f"@ z-score threshold={detection_threshold}:\n\n"
262
- # for k,v in score_dict.items():
263
- # if k=='green_fraction':
264
- # output_str+=f"{format_names(k)}={v:.1%}"
265
- # elif k=='confidence':
266
- # output_str+=f"{format_names(k)}={v:.3%}"
267
- # elif isinstance(v, float):
268
- # output_str+=f"{format_names(k)}={v:.3g}"
269
- # else:
270
- # output_str += v
271
- # return output_str
272
  def list_format_scores(score_dict, detection_threshold):
 
273
  lst_2d = []
274
  lst_2d.append(["z-score threshold", f"{detection_threshold}"])
275
  for k,v in score_dict.items():
@@ -286,6 +284,8 @@ def list_format_scores(score_dict, detection_threshold):
286
  return lst_2d
287
 
288
  def detect(input_text, args, device=None, tokenizer=None):
 
 
289
  watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
290
  gamma=args.gamma,
291
  seeding_scheme=args.seeding_scheme,
@@ -306,7 +306,7 @@ def detect(input_text, args, device=None, tokenizer=None):
306
  return output, args
307
 
308
  def run_gradio(args, model=None, device=None, tokenizer=None):
309
-
310
  generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
311
  detect_partial = partial(detect, device=device, tokenizer=tokenizer)
312
 
@@ -315,15 +315,38 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
315
  # Top section, greeting and instructions
316
  gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
317
  gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
  with gr.Accordion("A note on model capability",open=False):
320
  gr.Markdown(
321
  """
322
- The models that can be used in this demo are limited to those that are open source as well as fit on a single commodity GPU. In particular, there are few models above 10B parameters and way fewer trained using both Instruction finetuning or RLHF that are open source that we can use.
323
-
324
- Therefore, the model, in both it's un-watermarked (normal) and watermarked state, is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
325
 
326
- We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
 
 
327
  """
328
  )
329
 
@@ -407,9 +430,59 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
407
  seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
408
  with gr.Column(scale=1):
409
  select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
410
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  gr.HTML("""
412
- <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
 
413
  <br/>
414
  <a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
415
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
@@ -506,7 +579,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
506
  demo.launch()
507
 
508
  def main(args):
509
-
 
510
  # Initial arg processing and log
511
  args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
512
  print(args)
 
33
  from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
34
 
35
  def str2bool(v):
36
+ """Util function for user friendly boolean flag args"""
37
  if isinstance(v, bool):
38
  return v
39
  if v.lower() in ('yes', 'true', 't', 'y', '1'):
 
44
  raise argparse.ArgumentTypeError('Boolean value expected.')
45
 
46
  def parse_args():
47
+ """Command line argument specification"""
48
 
49
  parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
50
 
 
166
  return args
167
 
168
  def load_model(args):
169
+ """Load and return the model and tokenizer"""
170
+
171
  args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
172
  args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
173
  if args.is_seq2seq_model:
 
189
  return model, tokenizer, device
190
 
191
  def generate(prompt, args, model=None, device=None, tokenizer=None):
192
+ """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
193
+ and generate watermarked text by passing it to the generate method of the model
194
+ as a logits processor. """
195
+
196
  print(f"Generating with {args}")
197
 
198
  watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
 
258
  # decoded_output_with_watermark)
259
 
260
  def format_names(s):
261
+ """Format names for the gradio demo interface"""
262
  s=s.replace("num_tokens_scored","Tokens Counted (T)")
263
  s=s.replace("num_green_tokens","# Tokens in Greenlist")
264
  s=s.replace("green_fraction","Fraction of T in Greenlist")
265
  s=s.replace("z_score","z-score")
266
  s=s.replace("p_value","p value")
267
  return s
268
+
 
 
 
 
 
 
 
 
 
 
 
269
  def list_format_scores(score_dict, detection_threshold):
270
+ """Format the detection metrics into a gradio dataframe input format"""
271
  lst_2d = []
272
  lst_2d.append(["z-score threshold", f"{detection_threshold}"])
273
  for k,v in score_dict.items():
 
284
  return lst_2d
285
 
286
  def detect(input_text, args, device=None, tokenizer=None):
287
+ """Instantiate the WatermarkDetection object and call detect on
288
+ the input text returning the scores and outcome of the test"""
289
  watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
290
  gamma=args.gamma,
291
  seeding_scheme=args.seeding_scheme,
 
306
  return output, args
307
 
308
  def run_gradio(args, model=None, device=None, tokenizer=None):
309
+ """Define and launch the gradio demo interface"""
310
  generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
311
  detect_partial = partial(detect, device=device, tokenizer=tokenizer)
312
 
 
315
  # Top section, greeting and instructions
316
  gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
317
  gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
318
+ gr.Markdown(f"Language model: {args.model_name_or_path}")
319
+ with gr.Accordion("Understanding the output metrics",open=False):
320
+ gr.Markdown(
321
+ """
322
+ - `z-score threshold` : The cuttoff for the hypothesis test
323
+ - `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
324
+ The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
325
+ a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
326
+ described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
327
+ - `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
328
+ - `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
329
+ - `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
330
+ we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
331
+ - `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
332
+ observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
333
+ If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
334
+ - `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
335
+ - `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
336
+ the confidence of the detection based on the unlikeliness of this `z-score` observation.
337
+ """
338
+ )
339
 
340
  with gr.Accordion("A note on model capability",open=False):
341
  gr.Markdown(
342
  """
343
+ The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
344
+ In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
345
+ Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
346
 
347
+ We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
348
+ Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
349
+ Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
350
  """
351
  )
352
 
 
430
  seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
431
  with gr.Column(scale=1):
432
  select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
433
+
434
+ with gr.Accordion("Understanding the settings",open=False):
435
+ gr.Markdown(
436
+ """
437
+ #### Generation Parameters:
438
+
439
+ - Decoding Method : We can generate tokens from the model using either multinomial sampling or we can generate using greedy decoding.
440
+ - Sampling Temperature : If using multinomial sampling we can set the temperature of the sampling distribution.
441
+ 0.0 is equivalent to greedy decoding, and 1.0 is the maximum amount of variability/entropy in the next token distribution.
442
+ 0.7 strikes a nice balance between faithfulness to the model's estimate of top candidates while adding variety. Does not apply for greedy decoding.
443
+ - Generation Seed : The integer to pass to the torch random number generator before running generation. Makes the multinomial sampling strategy
444
+ outputs reproducible. Does not apply for greedy decoding.
445
+ - Number of Beams : When using greedy decoding, we can also set the number of beams to > 1 to enable beam search.
446
+ This is not implemented/excluded from paper for multinomial sampling but may be added in future.
447
+ - Max Generated Tokens : The `max_new_tokens` parameter passed to the generation method to stop the output at a certain number of new tokens.
448
+ Note that the model is free to generate fewer tokens depending on the prompt.
449
+ Implicitly this sets the maximum number of prompt tokens possible as the model's maximum input length minus `max_new_tokens`,
450
+ and inputs will be truncated accordingly.
451
+
452
+ #### Watermark Parameters:
453
+
454
+ - gamma : The fraction of the vocabulary to be partitioned into the greenlist at each generation step.
455
+ Smaller gamma values create a stronger watermark by enabling the watermarked model to achieve
456
+ a greater differentiation from human/unwatermarked text because it is preferentially sampling
457
+ from a smaller green set making those tokens less likely to occur by chance.
458
+ - delta : The amount of positive bias to add to the logits of every token in the greenlist
459
+ at each generation step before sampling/choosing the next token. Higher delta values
460
+ mean that the greenlist tokens are more heavily preferred by the watermarked model
461
+ and as the bias becomes very large the watermark transitions from "soft" to "hard".
462
+ For a hard watermark, nearly all tokens are green, but this can have a detrimental effect on
463
+ generation quality, especially when there is not a lot of flexibility in the distribution.
464
+ - z-score threshold : the z-score cuttoff for the hypothesis test. Higher thresholds (such as 4.0) make
465
+ _false positives_ (predicting that human/unwatermarked text is watermarked) very unlikely
466
+ as a genuine human text with a significant number of tokens will almost never achieve
467
+ that high of a z-score. Lower thresholds will capture more _true positives_ as some watermarked
468
+ texts will contain less green tokens and achive a lower z-score, but still pass the lower bar and
469
+ be flagged as "watermarked". However, a lowere threshold will increase the chance that human text
470
+ that contains a slightly higher than average number of green tokens is erroneously flagged.
471
+ 4.0-5.0 offers extremely low false positive rates while still accurately catching most watermarked text.
472
+ - Ignore Bigram Repeats : This alternate detection algorithm only considers the unique bigrams in the text during detection,
473
+ computing the greenlists based on the first in each pair and checking whether the second falls within the list.
474
+ This means that `T` is now the unique number of bigrams in the text, which becomes less than the total
475
+ number of tokens generated if the text contains a lot of repetition. See the paper for a more detailed discussion.
476
+ - Normalizations : we implement a few basic normaliations to defend against various adversarial perturbations of the
477
+ text analyzed during detection. Currently we support converting all chracters to unicode,
478
+ replacing homoglyphs with a canonical form, and standardizing the capitalization.
479
+ See the paper for a detailed discussion of input normalization.
480
+ """
481
+ )
482
+
483
  gr.HTML("""
484
+ <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
485
+ Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
486
  <br/>
487
  <a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
488
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
 
579
  demo.launch()
580
 
581
  def main(args):
582
+ """Run a command line version of the generation and detection operations
583
+ and optionally launch and serve the gradio demo"""
584
  # Initial arg processing and log
585
  args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
586
  print(args)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  nltk
2
  scipy
3
  torch
 
1
+ gradio
2
  nltk
3
  scipy
4
  torch