RyanMullins commited on
Commit
46f6023
β€’
1 Parent(s): 80c639a

Fixing class names

Browse files
Files changed (1) hide show
  1. app.py +54 -3
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from collections.abc import Sequence
 
2
  import random
3
  from typing import Optional
4
 
@@ -85,10 +86,9 @@ detector_module = transformers.generation.BayesianDetectorModel.from_pretrained(
85
  )
86
  detector_module.to(_TORCH_DEVICE)
87
 
88
- detector = transformers.generation.watermarking.BayesianDetectorModel(
89
  detector_module=detector_module,
90
  logits_processor=logits_processor,
91
- tokenizer=tokenizer,
92
  )
93
 
94
 
@@ -114,7 +114,7 @@ def generate_outputs(
114
 
115
  with gr.Blocks() as demo:
116
  gr.Markdown(
117
- '''
118
  # Using SynthID Text in your Genreative AI projects
119
 
120
  [SynthID][synthid] is a Google DeepMind technology that watermarks and
@@ -143,12 +143,63 @@ with gr.Blocks() as demo:
143
  detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
144
  how this technology works.
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  [raitk-synthid]: /responsible/docs/safeguards/synthid
147
  [synthid]: https://deepmind.google/technologies/synthid/
148
  [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
149
  [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
150
  [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
151
  [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
 
152
  '''
153
  )
154
  prompt_inputs = [
 
1
  from collections.abc import Sequence
2
+ import json
3
  import random
4
  from typing import Optional
5
 
 
86
  )
87
  detector_module.to(_TORCH_DEVICE)
88
 
89
+ detector = transformers.generation.watermarking.SynthIDTextWatermarkDetector(
90
  detector_module=detector_module,
91
  logits_processor=logits_processor,
 
92
  )
93
 
94
 
 
114
 
115
  with gr.Blocks() as demo:
116
  gr.Markdown(
117
+ f'''
118
  # Using SynthID Text in your Genreative AI projects
119
 
120
  [SynthID][synthid] is a Google DeepMind technology that watermarks and
 
143
  detectors, and the [SynthID Text documentaiton][raitk-synthid] for more on
144
  how this technology works.
145
 
146
+ ## Getting started
147
+
148
+ Practically speaking, SynthID Text is a logits processor, applied to your
149
+ model's generation pipeline after [Top-K and Top-P][cloud-parameter-values],
150
+ that augments the model's logits using a pseudorandom _g_-function to encode
151
+ watermarking information in a way that balances generation quality with
152
+ watermark detectability. See the [paper][synthid-nature] for a complete
153
+ technical description of the algorithm and analyses of how different
154
+ configuration values affect performance.
155
+
156
+ Watermarks are [configured][synthid-hf-config] to parameterize the
157
+ _g_-function and how it is applied during generation. We use the following
158
+ configuration for all demos. It should not be used for any production
159
+ purposes.
160
+
161
+ ```json
162
+ {json.dumps(_WATERMARK_CONFIG_DICT)}
163
+ ```
164
+
165
+ Watermarks are applied by initializing a `SynthIDTextWatermarkingConfig`
166
+ and passing that as the `watermarking_config=` parameter in your call to
167
+ `.generate()`, as shown in the snippet below.
168
+
169
+ ```python
170
+ from transformers import AutoModelForCausalLM, AutoTokenizer
171
+ from transformers.generation import SynthIDTextWatermarkingConfig
172
+
173
+ # Standard model and toeknizer initialization
174
+ tokenizer = AutoTokenizer.from_pretrained('repo/id')
175
+ model = AutoModelForCausalLM.from_pretrained('repo/id')
176
+
177
+ # SynthID Text configuration
178
+ watermarking_config = SynthIDTextWatermarkingConfig(...)
179
+
180
+ # Generation with watermarking
181
+ tokenized_prompts = tokenizer(["your prompts here"])
182
+ output_sequences = model.generate(
183
+ **tokenized_prompts,
184
+ watermarking_config=watermarking_config,
185
+ do_sample=True,
186
+ )
187
+ watermarked_text = tokenizer.batch_decode(output_sequences)
188
+ ```
189
+
190
+ Enter up to three prompts then click the generate button. After you click,
191
+ [Gemma 2B][gemma] will generate a watermarked and non-watermarked repsonses
192
+ for each non-empty prompt.
193
+
194
+ [cloud-parameter-values]: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/adjust-parameter-values
195
+ [gemma]: https://huggingface.co/google/gemma-2b
196
  [raitk-synthid]: /responsible/docs/safeguards/synthid
197
  [synthid]: https://deepmind.google/technologies/synthid/
198
  [synthid-hf-config]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/configuration_utils.py
199
  [synthid-hf-detector]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/watermarking.py
200
  [synthid-hf-detector-e2e]: https://github.com/huggingface/transformers/blob/v4.46.0/examples/research_projects/synthid_text/detector_bayesian.py
201
  [synthid-hf-logits-processor]: https://github.com/huggingface/transformers/blob/v4.46.0/src/transformers/generation/logits_process.py
202
+ [synthid-nature]: https://www.nature.com/articles/s41586-024-08025-4
203
  '''
204
  )
205
  prompt_inputs = [