Shaltiel commited on
Commit
f277809
1 Parent(s): 0bd232a

Fixed special tokens map to not ignore unk

Browse files
Files changed (1) hide show
  1. BertForMorphTagging.py +1 -1
BertForMorphTagging.py CHANGED
@@ -157,7 +157,7 @@ class BertForMorphTagging(BertPreTrainedModel):
157
  # for each sentence, return a dict object with the following files { text, tokens }
158
  # Where tokens is a list of dicts, where each dict is:
159
  # { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
160
- special_tokens = set(tokenizer.special_tokens_map.values())
161
  ret = []
162
  for sent_idx,sentence in enumerate(sentences):
163
  input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])
 
157
  # for each sentence, return a dict object with the following files { text, tokens }
158
  # Where tokens is a list of dicts, where each dict is:
159
  # { pos: str, feats: dict, prefixes: List[str], suffix: str | bool, suffix_feats: dict | None}
160
+ special_tokens = set([tokenizer.pad_token, tokenizer.cls_token, tokenizer.sep_token])
161
  ret = []
162
  for sent_idx,sentence in enumerate(sentences):
163
  input_id_strs = tokenizer.convert_ids_to_tokens(inputs['input_ids'][sent_idx])