efederici commited on
Commit
d6b775b
1 Parent(s): 8ee0f76

Update adapt_tokenizer.py

Browse files
Files changed (1) hide show
  1. adapt_tokenizer.py +4 -5
adapt_tokenizer.py CHANGED
@@ -1,9 +1,8 @@
1
- from typing import Union
2
- from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
3
- Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
4
  NUM_SENTINEL_TOKENS: int = 100
5
 
6
- def adapt_tokenizer_for_denoising(tokenizer: Tokenizer):
7
  """Adds sentinel tokens and padding token (if missing).
8
 
9
  Expands the tokenizer vocabulary to include sentinel tokens
@@ -34,7 +33,7 @@ class AutoTokenizerForMOD(AutoTokenizer):
34
  """
35
 
36
  @classmethod
37
- def from_pretrained(cls, *args, **kwargs):
38
  """See `AutoTokenizer.from_pretrained` docstring."""
39
  tokenizer = super().from_pretrained(*args, **kwargs)
40
  adapt_tokenizer_for_denoising(tokenizer)
 
1
+ from typing import Any
2
+ from transformers import AutoTokenizer, PreTrainedTokenizerBase
 
3
  NUM_SENTINEL_TOKENS: int = 100
4
 
5
+ def adapt_tokenizer_for_denoising(tokenizer: PreTrainedTokenizerBase) -> None:
6
  """Adds sentinel tokens and padding token (if missing).
7
 
8
  Expands the tokenizer vocabulary to include sentinel tokens
 
33
  """
34
 
35
  @classmethod
36
+ def from_pretrained(cls, *args: Any, **kwargs: Any) -> PreTrainedTokenizerBase:
37
  """See `AutoTokenizer.from_pretrained` docstring."""
38
  tokenizer = super().from_pretrained(*args, **kwargs)
39
  adapt_tokenizer_for_denoising(tokenizer)