Spaces:
Sleeping
Sleeping
jwkirchenbauer
commited on
Commit
•
fa826da
1
Parent(s):
f98590c
refactor markov_1 to simple_1
Browse files- .gitignore +1 -0
- app.py +1 -1
- demo_watermark.py +1 -1
- watermark_processor.py +4 -4
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
app.py
CHANGED
@@ -31,7 +31,7 @@ arg_dict = {
|
|
31 |
'n_beams': 1,
|
32 |
'sampling_temp': 0.7,
|
33 |
'use_gpu': True,
|
34 |
-
'seeding_scheme': '
|
35 |
'gamma': 0.25,
|
36 |
'delta': 2.0,
|
37 |
'normalizers': '',
|
|
|
31 |
'n_beams': 1,
|
32 |
'sampling_temp': 0.7,
|
33 |
'use_gpu': True,
|
34 |
+
'seeding_scheme': 'simple_1',
|
35 |
'gamma': 0.25,
|
36 |
'delta': 2.0,
|
37 |
'normalizers': '',
|
demo_watermark.py
CHANGED
@@ -109,7 +109,7 @@ def parse_args():
|
|
109 |
parser.add_argument(
|
110 |
"--seeding_scheme",
|
111 |
type=str,
|
112 |
-
default="
|
113 |
help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
|
114 |
)
|
115 |
parser.add_argument(
|
|
|
109 |
parser.add_argument(
|
110 |
"--seeding_scheme",
|
111 |
type=str,
|
112 |
+
default="simple_1",
|
113 |
help="Seeding scheme to use to generate the greenlists at each generation and verification step.",
|
114 |
)
|
115 |
parser.add_argument(
|
watermark_processor.py
CHANGED
@@ -35,7 +35,7 @@ class WatermarkBase:
|
|
35 |
vocab: list[int] = None,
|
36 |
gamma: float = 0.5,
|
37 |
delta: float = 2.0,
|
38 |
-
seeding_scheme: str = "
|
39 |
hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
|
40 |
select_green_tokens: bool = True,
|
41 |
):
|
@@ -56,7 +56,7 @@ class WatermarkBase:
|
|
56 |
if seeding_scheme is None:
|
57 |
seeding_scheme = self.seeding_scheme
|
58 |
|
59 |
-
if seeding_scheme == "
|
60 |
assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
|
61 |
prev_token = input_ids[-1].item()
|
62 |
self.rng.manual_seed(self.hash_key * prev_token)
|
@@ -138,7 +138,7 @@ class WatermarkDetector(WatermarkBase):
|
|
138 |
self.z_threshold = z_threshold
|
139 |
self.rng = torch.Generator(device=self.device)
|
140 |
|
141 |
-
if self.seeding_scheme == "
|
142 |
self.min_prefix_len = 1
|
143 |
else:
|
144 |
raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
|
@@ -149,7 +149,7 @@ class WatermarkDetector(WatermarkBase):
|
|
149 |
|
150 |
self.ignore_repeated_bigrams = ignore_repeated_bigrams
|
151 |
if self.ignore_repeated_bigrams:
|
152 |
-
assert self.seeding_scheme == "
|
153 |
|
154 |
|
155 |
def _compute_z_score(self, observed_count, T):
|
|
|
35 |
vocab: list[int] = None,
|
36 |
gamma: float = 0.5,
|
37 |
delta: float = 2.0,
|
38 |
+
seeding_scheme: str = "simple_1", # mostly unused/always default
|
39 |
hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width
|
40 |
select_green_tokens: bool = True,
|
41 |
):
|
|
|
56 |
if seeding_scheme is None:
|
57 |
seeding_scheme = self.seeding_scheme
|
58 |
|
59 |
+
if seeding_scheme == "simple_1":
|
60 |
assert input_ids.shape[-1] >= 1, f"seeding_scheme={seeding_scheme} requires at least a 1 token prefix sequence to seed rng"
|
61 |
prev_token = input_ids[-1].item()
|
62 |
self.rng.manual_seed(self.hash_key * prev_token)
|
|
|
138 |
self.z_threshold = z_threshold
|
139 |
self.rng = torch.Generator(device=self.device)
|
140 |
|
141 |
+
if self.seeding_scheme == "simple_1":
|
142 |
self.min_prefix_len = 1
|
143 |
else:
|
144 |
raise NotImplementedError(f"Unexpected seeding_scheme: {self.seeding_scheme}")
|
|
|
149 |
|
150 |
self.ignore_repeated_bigrams = ignore_repeated_bigrams
|
151 |
if self.ignore_repeated_bigrams:
|
152 |
+
assert self.seeding_scheme == "simple_1", "No repeated bigram credit variant assumes the single token seeding scheme."
|
153 |
|
154 |
|
155 |
def _compute_z_score(self, observed_count, T):
|