Spaces:
Running
Running
Update Space (evaluate main: 9f0f888e)
Browse files- perplexity.py +7 -5
- requirements.txt +1 -1
perplexity.py
CHANGED
|
@@ -100,7 +100,9 @@ class Perplexity(evaluate.Metric):
|
|
| 100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
| 101 |
)
|
| 102 |
|
| 103 |
-
def _compute(
|
|
|
|
|
|
|
| 104 |
|
| 105 |
if device is not None:
|
| 106 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
|
@@ -126,20 +128,20 @@ class Perplexity(evaluate.Metric):
|
|
| 126 |
# assign one of the special tokens to also be the pad token
|
| 127 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
| 128 |
|
| 129 |
-
if add_start_token:
|
| 130 |
# leave room for <BOS> token to be added:
|
| 131 |
assert (
|
| 132 |
tokenizer.bos_token is not None
|
| 133 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
| 134 |
-
max_tokenized_len =
|
| 135 |
else:
|
| 136 |
-
max_tokenized_len =
|
| 137 |
|
| 138 |
encodings = tokenizer(
|
| 139 |
predictions,
|
| 140 |
add_special_tokens=False,
|
| 141 |
padding=True,
|
| 142 |
-
truncation=True,
|
| 143 |
max_length=max_tokenized_len,
|
| 144 |
return_tensors="pt",
|
| 145 |
return_attention_mask=True,
|
|
|
|
| 100 |
reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
|
| 101 |
)
|
| 102 |
|
| 103 |
+
def _compute(
|
| 104 |
+
self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
|
| 105 |
+
):
|
| 106 |
|
| 107 |
if device is not None:
|
| 108 |
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
|
|
|
|
| 128 |
# assign one of the special tokens to also be the pad token
|
| 129 |
tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
|
| 130 |
|
| 131 |
+
if add_start_token and max_length:
|
| 132 |
# leave room for <BOS> token to be added:
|
| 133 |
assert (
|
| 134 |
tokenizer.bos_token is not None
|
| 135 |
), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
|
| 136 |
+
max_tokenized_len = max_length - 1
|
| 137 |
else:
|
| 138 |
+
max_tokenized_len = max_length
|
| 139 |
|
| 140 |
encodings = tokenizer(
|
| 141 |
predictions,
|
| 142 |
add_special_tokens=False,
|
| 143 |
padding=True,
|
| 144 |
+
truncation=True if max_tokenized_len else False,
|
| 145 |
max_length=max_tokenized_len,
|
| 146 |
return_tensors="pt",
|
| 147 |
return_attention_mask=True,
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
git+https://github.com/huggingface/evaluate@
|
| 2 |
torch
|
| 3 |
torch
|
| 4 |
transformers
|
|
|
|
| 1 |
+
git+https://github.com/huggingface/evaluate@9f0f888eb455bc0952f467b1cab47716e3f04e83
|
| 2 |
torch
|
| 3 |
torch
|
| 4 |
transformers
|