Files changed (3) hide show
  1. README.md +2 -2
  2. perplexity.py +9 -11
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.19.1
8
  app_file: app.py
9
  pinned: false
10
  tags:
@@ -73,7 +73,7 @@ results = perplexity.compute(model_id='gpt2',
73
  print(list(results.keys()))
74
  >>>['perplexities', 'mean_perplexity']
75
  print(round(results["mean_perplexity"], 2))
76
- >>>646.75
77
  print(round(results["perplexities"][0], 2))
78
  >>>32.25
79
  ```
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.0.2
8
  app_file: app.py
9
  pinned: false
10
  tags:
73
  print(list(results.keys()))
74
  >>>['perplexities', 'mean_perplexity']
75
  print(round(results["mean_perplexity"], 2))
76
+ >>>646.74
77
  print(round(results["perplexities"][0], 2))
78
  >>>32.25
79
  ```
perplexity.py CHANGED
@@ -63,10 +63,10 @@ Examples:
63
  ... predictions=input_texts) # doctest:+ELLIPSIS
64
  >>> print(list(results.keys()))
65
  ['perplexities', 'mean_perplexity']
66
- >>> print(round(results["mean_perplexity"], 0))
67
- 647.0
68
- >>> print(round(results["perplexities"][0], 0))
69
- 32.0
70
 
71
  Example 2:
72
  >>> from datasets import load_dataset
@@ -100,9 +100,7 @@ class Perplexity(evaluate.Metric):
100
  reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
101
  )
102
 
103
- def _compute(
104
- self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
105
- ):
106
 
107
  if device is not None:
108
  assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
@@ -128,20 +126,20 @@ class Perplexity(evaluate.Metric):
128
  # assign one of the special tokens to also be the pad token
129
  tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
130
 
131
- if add_start_token and max_length:
132
  # leave room for <BOS> token to be added:
133
  assert (
134
  tokenizer.bos_token is not None
135
  ), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
136
- max_tokenized_len = max_length - 1
137
  else:
138
- max_tokenized_len = max_length
139
 
140
  encodings = tokenizer(
141
  predictions,
142
  add_special_tokens=False,
143
  padding=True,
144
- truncation=True if max_tokenized_len else False,
145
  max_length=max_tokenized_len,
146
  return_tensors="pt",
147
  return_attention_mask=True,
63
  ... predictions=input_texts) # doctest:+ELLIPSIS
64
  >>> print(list(results.keys()))
65
  ['perplexities', 'mean_perplexity']
66
+ >>> print(round(results["mean_perplexity"], 2))
67
+ 78.22
68
+ >>> print(round(results["perplexities"][0], 2))
69
+ 11.11
70
 
71
  Example 2:
72
  >>> from datasets import load_dataset
100
  reference_urls=["https://huggingface.co/docs/transformers/perplexity"],
101
  )
102
 
103
+ def _compute(self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None):
 
 
104
 
105
  if device is not None:
106
  assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
126
  # assign one of the special tokens to also be the pad token
127
  tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})
128
 
129
+ if add_start_token:
130
  # leave room for <BOS> token to be added:
131
  assert (
132
  tokenizer.bos_token is not None
133
  ), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
134
+ max_tokenized_len = model.config.max_length - 1
135
  else:
136
+ max_tokenized_len = model.config.max_length
137
 
138
  encodings = tokenizer(
139
  predictions,
140
  add_special_tokens=False,
141
  padding=True,
142
+ truncation=True,
143
  max_length=max_tokenized_len,
144
  return_tensors="pt",
145
  return_attention_mask=True,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- git+https://github.com/huggingface/evaluate@a4bdc10c48a450b978d91389a48dbb5297835c7d
2
  torch
3
  torch
4
  transformers
1
+ git+https://github.com/huggingface/evaluate@4487d9d1e65216a36b4aa94e3396a570f44a1525
2
  torch
3
  torch
4
  transformers