stanrom commited on
Commit
07679de
1 Parent(s): c3b6e57

Update tokenization_InternLM_XComposer.py

Browse files
Files changed (1) hide show
  1. tokenization_InternLM_XComposer.py +8 -8
tokenization_InternLM_XComposer.py CHANGED
@@ -63,6 +63,13 @@ class InternLMXComposerTokenizer(PreTrainedTokenizer):
63
  **kwargs,
64
  ):
65
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
 
 
 
 
 
 
 
66
  super().__init__(
67
  bos_token=bos_token,
68
  eos_token=eos_token,
@@ -70,15 +77,8 @@ class InternLMXComposerTokenizer(PreTrainedTokenizer):
70
  pad_token=pad_token,
71
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
72
  **kwargs,
73
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs),
74
  )
75
- self.vocab_file = vocab_file
76
- self.add_bos_token = add_bos_token
77
- self.add_eos_token = add_eos_token
78
- self.decode_with_prefix_space = decode_with_prefix_space
79
-
80
- self.sp_model.Load(vocab_file)
81
- self._no_prefix_space_tokens = None
82
 
83
  """ Initialisation"""
84
 
 
63
  **kwargs,
64
  ):
65
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
66
+ self.vocab_file = vocab_file
67
+ self.add_bos_token = add_bos_token
68
+ self.add_eos_token = add_eos_token
69
+ self.decode_with_prefix_space = decode_with_prefix_space
70
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
71
+ self.sp_model.Load(vocab_file)
72
+ self._no_prefix_space_tokens = None
73
  super().__init__(
74
  bos_token=bos_token,
75
  eos_token=eos_token,
 
77
  pad_token=pad_token,
78
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
79
  **kwargs,
 
80
  )
81
+
 
 
 
 
 
 
82
 
83
  """ Initialisation"""
84