jwkirchenbauer commited on
Commit
fa826da
1 Parent(s): f98590c

refactor markov_1 to simple_1

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +1 -1
  3. demo_watermark.py +1 -1
  4. watermark_processor.py +4 -4
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ __pycache__
app.py CHANGED
@@ -31,7 +31,7 @@ arg_dict = {
31
  'n_beams': 1,
32
  'sampling_temp': 0.7,
33
  'use_gpu': True,
34
- 'seeding_scheme': 'markov_1',
35
  'gamma': 0.25,
36
  'delta': 2.0,
37
  'normalizers': '',
31
  'n_beams': 1,
32
  'sampling_temp': 0.7,
33
  'use_gpu': True,
34
+ 'seeding_scheme': 'simple_1',
35
  'gamma': 0.25,
36
  'delta': 2.0,
37
  'normalizers': '',
demo_watermark.py CHANGED
@@ -109,7 +109,7 @@ def parse_args():
109
  parser.add_argument(
110
  "--seeding_scheme",
111
  type=str,
112
- default="markov_1",
113
  help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
114
  )
115
  parser.add_argument(
109
  parser.add_argument(
110
  "--seeding_scheme",
111
  type=str,
112
+ default="simple_1",
113
  help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
114
  )
115
  parser.add_argument(
watermark_processor.py CHANGED
@@ -35,7 +35,7 @@ class WatermarkBase:
35
  vocab: list[int] = None,
36
  gamma: float = 0.5,
37
  delta: float = 2.0,
38
- seeding_scheme: str = "markov_1", # mostly unused/always default
39
  hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
40
  select_green_tokens: bool = True,
41
  ):
@@ -56,7 +56,7 @@ class WatermarkBase:
56
  if seeding_scheme is None:
57
  seeding_scheme = self.seeding_scheme
58
 
59
- if seeding_scheme == "markov_1":
60
  assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
61
  prev_token = input_ids[-1].item()
62
  self.rng.manual_seed(self.hash_key * prev_token)
@@ -138,7 +138,7 @@ class WatermarkDetector(WatermarkBase):
138
  self.z_threshold = z_threshold
139
  self.rng = torch.Generator(device=self.device)
140
 
141
- if self.seeding_scheme == "markov_1":
142
  self.min_prefix_len = 1
143
  else:
144
  raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
@@ -149,7 +149,7 @@ class WatermarkDetector(WatermarkBase):
149
 
150
  self.ignore_repeated_bigrams = ignore_repeated_bigrams
151
  if self.ignore_repeated_bigrams:
152
- assert self.seeding_scheme == "markov_1", "No repeated bigram credit variant assumes the single token seeding scheme."
153
 
154
 
155
  def _compute_z_score(self, observed_count, T):
35
  vocab: list[int] = None,
36
  gamma: float = 0.5,
37
  delta: float = 2.0,
38
+ seeding_scheme: str = "simple_1", # mostly unused/always default
39
  hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
40
  select_green_tokens: bool = True,
41
  ):
56
  if seeding_scheme is None:
57
  seeding_scheme = self.seeding_scheme
58
 
59
+ if seeding_scheme == "simple_1":
60
  assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
61
  prev_token = input_ids[-1].item()
62
  self.rng.manual_seed(self.hash_key * prev_token)
138
  self.z_threshold = z_threshold
139
  self.rng = torch.Generator(device=self.device)
140
 
141
+ if self.seeding_scheme == "simple_1":
142
  self.min_prefix_len = 1
143
  else:
144
  raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
149
 
150
  self.ignore_repeated_bigrams = ignore_repeated_bigrams
151
  if self.ignore_repeated_bigrams:
152
+ assert self.seeding_scheme == "simple_1", "No repeated bigram credit variant assumes the single token seeding scheme."
153
 
154
 
155
  def _compute_z_score(self, observed_count, T):