Spaces:
Sleeping
Sleeping
jwkirchenbauer
commited on
Commit
·
29d8de2
1
Parent(s):
cb2cffc
Gradio details panels, docstrings
Browse files- demo_watermark.py +95 -21
- requirements.txt +1 -0
demo_watermark.py
CHANGED
@@ -33,6 +33,7 @@ from transformers import (AutoTokenizer,
|
|
33 |
from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
|
34 |
|
35 |
def str2bool(v):
|
|
|
36 |
if isinstance(v, bool):
|
37 |
return v
|
38 |
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
@@ -43,6 +44,7 @@ def str2bool(v):
|
|
43 |
raise argparse.ArgumentTypeError('Boolean value expected.')
|
44 |
|
45 |
def parse_args():
|
|
|
46 |
|
47 |
parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
|
48 |
|
@@ -164,6 +166,8 @@ def parse_args():
|
|
164 |
return args
|
165 |
|
166 |
def load_model(args):
|
|
|
|
|
167 |
args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
|
168 |
args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
|
169 |
if args.is_seq2seq_model:
|
@@ -185,7 +189,10 @@ def load_model(args):
|
|
185 |
return model, tokenizer, device
|
186 |
|
187 |
def generate(prompt, args, model=None, device=None, tokenizer=None):
|
188 |
-
|
|
|
|
|
|
|
189 |
print(f"Generating with {args}")
|
190 |
|
191 |
watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
|
@@ -251,25 +258,16 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
|
|
251 |
# decoded_output_with_watermark)
|
252 |
|
253 |
def format_names(s):
|
|
|
254 |
s=s.replace("num_tokens_scored","Tokens Counted (T)")
|
255 |
s=s.replace("num_green_tokens","# Tokens in Greenlist")
|
256 |
s=s.replace("green_fraction","Fraction of T in Greenlist")
|
257 |
s=s.replace("z_score","z-score")
|
258 |
s=s.replace("p_value","p value")
|
259 |
return s
|
260 |
-
|
261 |
-
# output_str = f"@ z-score threshold={detection_threshold}:\n\n"
|
262 |
-
# for k,v in score_dict.items():
|
263 |
-
# if k=='green_fraction':
|
264 |
-
# output_str+=f"{format_names(k)}={v:.1%}"
|
265 |
-
# elif k=='confidence':
|
266 |
-
# output_str+=f"{format_names(k)}={v:.3%}"
|
267 |
-
# elif isinstance(v, float):
|
268 |
-
# output_str+=f"{format_names(k)}={v:.3g}"
|
269 |
-
# else:
|
270 |
-
# output_str += v
|
271 |
-
# return output_str
|
272 |
def list_format_scores(score_dict, detection_threshold):
|
|
|
273 |
lst_2d = []
|
274 |
lst_2d.append(["z-score threshold", f"{detection_threshold}"])
|
275 |
for k,v in score_dict.items():
|
@@ -286,6 +284,8 @@ def list_format_scores(score_dict, detection_threshold):
|
|
286 |
return lst_2d
|
287 |
|
288 |
def detect(input_text, args, device=None, tokenizer=None):
|
|
|
|
|
289 |
watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
|
290 |
gamma=args.gamma,
|
291 |
seeding_scheme=args.seeding_scheme,
|
@@ -306,7 +306,7 @@ def detect(input_text, args, device=None, tokenizer=None):
|
|
306 |
return output, args
|
307 |
|
308 |
def run_gradio(args, model=None, device=None, tokenizer=None):
|
309 |
-
|
310 |
generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
|
311 |
detect_partial = partial(detect, device=device, tokenizer=tokenizer)
|
312 |
|
@@ -315,15 +315,38 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
315 |
# Top section, greeting and instructions
|
316 |
gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
|
317 |
gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
with gr.Accordion("A note on model capability",open=False):
|
320 |
gr.Markdown(
|
321 |
"""
|
322 |
-
The models that can be used in this demo are limited to those that are open source
|
323 |
-
|
324 |
-
Therefore,
|
325 |
|
326 |
-
We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
|
|
|
|
|
327 |
"""
|
328 |
)
|
329 |
|
@@ -407,9 +430,59 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
407 |
seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
|
408 |
with gr.Column(scale=1):
|
409 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
411 |
gr.HTML("""
|
412 |
-
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
|
|
413 |
<br/>
|
414 |
<a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
|
415 |
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
@@ -506,7 +579,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
506 |
demo.launch()
|
507 |
|
508 |
def main(args):
|
509 |
-
|
|
|
510 |
# Initial arg processing and log
|
511 |
args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
|
512 |
print(args)
|
|
|
33 |
from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
|
34 |
|
35 |
def str2bool(v):
|
36 |
+
"""Util function for user friendly boolean flag args"""
|
37 |
if isinstance(v, bool):
|
38 |
return v
|
39 |
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
|
|
44 |
raise argparse.ArgumentTypeError('Boolean value expected.')
|
45 |
|
46 |
def parse_args():
|
47 |
+
"""Command line argument specification"""
|
48 |
|
49 |
parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
|
50 |
|
|
|
166 |
return args
|
167 |
|
168 |
def load_model(args):
|
169 |
+
"""Load and return the model and tokenizer"""
|
170 |
+
|
171 |
args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
|
172 |
args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
|
173 |
if args.is_seq2seq_model:
|
|
|
189 |
return model, tokenizer, device
|
190 |
|
191 |
def generate(prompt, args, model=None, device=None, tokenizer=None):
|
192 |
+
"""Instatiate the WatermarkLogitsProcessor according to the watermark parameters
|
193 |
+
and generate watermarked text by passing it to the generate method of the model
|
194 |
+
as a logits processor. """
|
195 |
+
|
196 |
print(f"Generating with {args}")
|
197 |
|
198 |
watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
|
|
|
258 |
# decoded_output_with_watermark)
|
259 |
|
260 |
def format_names(s):
|
261 |
+
"""Format names for the gradio demo interface"""
|
262 |
s=s.replace("num_tokens_scored","Tokens Counted (T)")
|
263 |
s=s.replace("num_green_tokens","# Tokens in Greenlist")
|
264 |
s=s.replace("green_fraction","Fraction of T in Greenlist")
|
265 |
s=s.replace("z_score","z-score")
|
266 |
s=s.replace("p_value","p value")
|
267 |
return s
|
268 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
def list_format_scores(score_dict, detection_threshold):
|
270 |
+
"""Format the detection metrics into a gradio dataframe input format"""
|
271 |
lst_2d = []
|
272 |
lst_2d.append(["z-score threshold", f"{detection_threshold}"])
|
273 |
for k,v in score_dict.items():
|
|
|
284 |
return lst_2d
|
285 |
|
286 |
def detect(input_text, args, device=None, tokenizer=None):
|
287 |
+
"""Instantiate the WatermarkDetection object and call detect on
|
288 |
+
the input text returning the scores and outcome of the test"""
|
289 |
watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
|
290 |
gamma=args.gamma,
|
291 |
seeding_scheme=args.seeding_scheme,
|
|
|
306 |
return output, args
|
307 |
|
308 |
def run_gradio(args, model=None, device=None, tokenizer=None):
|
309 |
+
"""Define and launch the gradio demo interface"""
|
310 |
generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
|
311 |
detect_partial = partial(detect, device=device, tokenizer=tokenizer)
|
312 |
|
|
|
315 |
# Top section, greeting and instructions
|
316 |
gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
|
317 |
gr.Markdown("[jwkirchenbauer/lm-watermarking![](https://badgen.net/badge/icon/GitHub?icon=github&label)](https://github.com/jwkirchenbauer/lm-watermarking)")
|
318 |
+
gr.Markdown(f"Language model: {args.model_name_or_path}")
|
319 |
+
with gr.Accordion("Understanding the output metrics",open=False):
|
320 |
+
gr.Markdown(
|
321 |
+
"""
|
322 |
+
- `z-score threshold` : The cuttoff for the hypothesis test
|
323 |
+
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
324 |
+
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
325 |
+
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
326 |
+
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
327 |
+
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
328 |
+
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
329 |
+
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
330 |
+
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
331 |
+
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
332 |
+
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
333 |
+
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
334 |
+
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
335 |
+
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
336 |
+
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
337 |
+
"""
|
338 |
+
)
|
339 |
|
340 |
with gr.Accordion("A note on model capability",open=False):
|
341 |
gr.Markdown(
|
342 |
"""
|
343 |
+
The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
|
344 |
+
In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
|
345 |
+
Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
|
346 |
|
347 |
+
We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
|
348 |
+
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
349 |
+
Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
|
350 |
"""
|
351 |
)
|
352 |
|
|
|
430 |
seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
|
431 |
with gr.Column(scale=1):
|
432 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
433 |
+
|
434 |
+
with gr.Accordion("Understanding the settings",open=False):
|
435 |
+
gr.Markdown(
|
436 |
+
"""
|
437 |
+
#### Generation Parameters:
|
438 |
+
|
439 |
+
- Decoding Method : We can generate tokens from the model using either multinomial sampling or we can generate using greedy decoding.
|
440 |
+
- Sampling Temperature : If using multinomial sampling we can set the temperature of the sampling distribution.
|
441 |
+
0.0 is equivalent to greedy decoding, and 1.0 is the maximum amount of variability/entropy in the next token distribution.
|
442 |
+
0.7 strikes a nice balance between faithfulness to the model's estimate of top candidates while adding variety. Does not apply for greedy decoding.
|
443 |
+
- Generation Seed : The integer to pass to the torch random number generator before running generation. Makes the multinomial sampling strategy
|
444 |
+
outputs reproducible. Does not apply for greedy decoding.
|
445 |
+
- Number of Beams : When using greedy decoding, we can also set the number of beams to > 1 to enable beam search.
|
446 |
+
This is not implemented/excluded from paper for multinomial sampling but may be added in future.
|
447 |
+
- Max Generated Tokens : The `max_new_tokens` parameter passed to the generation method to stop the output at a certain number of new tokens.
|
448 |
+
Note that the model is free to generate fewer tokens depending on the prompt.
|
449 |
+
Implicitly this sets the maximum number of prompt tokens possible as the model's maximum input length minus `max_new_tokens`,
|
450 |
+
and inputs will be truncated accordingly.
|
451 |
+
|
452 |
+
#### Watermark Parameters:
|
453 |
+
|
454 |
+
- gamma : The fraction of the vocabulary to be partitioned into the greenlist at each generation step.
|
455 |
+
Smaller gamma values create a stronger watermark by enabling the watermarked model to achieve
|
456 |
+
a greater differentiation from human/unwatermarked text because it is preferentially sampling
|
457 |
+
from a smaller green set making those tokens less likely to occur by chance.
|
458 |
+
- delta : The amount of positive bias to add to the logits of every token in the greenlist
|
459 |
+
at each generation step before sampling/choosing the next token. Higher delta values
|
460 |
+
mean that the greenlist tokens are more heavily preferred by the watermarked model
|
461 |
+
and as the bias becomes very large the watermark transitions from "soft" to "hard".
|
462 |
+
For a hard watermark, nearly all tokens are green, but this can have a detrimental effect on
|
463 |
+
generation quality, especially when there is not a lot of flexibility in the distribution.
|
464 |
+
- z-score threshold : the z-score cuttoff for the hypothesis test. Higher thresholds (such as 4.0) make
|
465 |
+
_false positives_ (predicting that human/unwatermarked text is watermarked) very unlikely
|
466 |
+
as a genuine human text with a significant number of tokens will almost never achieve
|
467 |
+
that high of a z-score. Lower thresholds will capture more _true positives_ as some watermarked
|
468 |
+
texts will contain less green tokens and achive a lower z-score, but still pass the lower bar and
|
469 |
+
be flagged as "watermarked". However, a lowere threshold will increase the chance that human text
|
470 |
+
that contains a slightly higher than average number of green tokens is erroneously flagged.
|
471 |
+
4.0-5.0 offers extremely low false positive rates while still accurately catching most watermarked text.
|
472 |
+
- Ignore Bigram Repeats : This alternate detection algorithm only considers the unique bigrams in the text during detection,
|
473 |
+
computing the greenlists based on the first in each pair and checking whether the second falls within the list.
|
474 |
+
This means that `T` is now the unique number of bigrams in the text, which becomes less than the total
|
475 |
+
number of tokens generated if the text contains a lot of repetition. See the paper for a more detailed discussion.
|
476 |
+
- Normalizations : we implement a few basic normaliations to defend against various adversarial perturbations of the
|
477 |
+
text analyzed during detection. Currently we support converting all chracters to unicode,
|
478 |
+
replacing homoglyphs with a canonical form, and standardizing the capitalization.
|
479 |
+
See the paper for a detailed discussion of input normalization.
|
480 |
+
"""
|
481 |
+
)
|
482 |
+
|
483 |
gr.HTML("""
|
484 |
+
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
485 |
+
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
486 |
<br/>
|
487 |
<a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
|
488 |
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
|
|
579 |
demo.launch()
|
580 |
|
581 |
def main(args):
|
582 |
+
"""Run a command line version of the generation and detection operations
|
583 |
+
and optionally launch and serve the gradio demo"""
|
584 |
# Initial arg processing and log
|
585 |
args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
|
586 |
print(args)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
nltk
|
2 |
scipy
|
3 |
torch
|
|
|
1 |
+
gradio
|
2 |
nltk
|
3 |
scipy
|
4 |
torch
|