mmhh888 commited on
Commit
ea756ed
1 Parent(s): 8890eed

Update bert_token_handler.py

Browse files
Files changed (1) hide show
  1. bert_token_handler.py +2 -2
bert_token_handler.py CHANGED
@@ -12,14 +12,14 @@ sents = ['选择珠江花园的原因就是方便。',
12
  '今天才知道这书还有第6卷,真有点郁闷。',
13
  '机器背面似平被撕了张什么标签,残胶还在。']
14
 
15
- -- simple clause enc & dec
16
  enc_out = tokenizer.encode(text=sents[0], text_pair=sents[1], truncation=True, padding='max_length', add_special_tokens=True, max_length=30, return_tensors=None)
17
  print(enc_out)
18
  dec_out = tokenizer.decode(enc_out)
19
  print(dec_out)
20
 
21
 
22
- -- add new token into dict
23
  all_dict = tokenizer.get_vocab()
24
  print('dict length:', len(all_dict))
25
  print('月光' in all_dict)
 
12
  '今天才知道这书还有第6卷,真有点郁闷。',
13
  '机器背面似平被撕了张什么标签,残胶还在。']
14
 
15
+ # simple clause enc & dec
16
  enc_out = tokenizer.encode(text=sents[0], text_pair=sents[1], truncation=True, padding='max_length', add_special_tokens=True, max_length=30, return_tensors=None)
17
  print(enc_out)
18
  dec_out = tokenizer.decode(enc_out)
19
  print(dec_out)
20
 
21
 
22
+ # add new token into dict
23
  all_dict = tokenizer.get_vocab()
24
  print('dict length:', len(all_dict))
25
  print('月光' in all_dict)