from transformers import BartTokenizer, BartForConditionalGeneration def main(): tokenizer = BartTokenizer.from_pretrained("facebook/bart-base") bart = BartForConditionalGeneration.from_pretrained("facebook/bart-base") num_added_tokens = tokenizer.add_special_tokens({ "additional_special_tokens": ["", ""], # beginning and end of an idiom }) print(num_added_tokens) print(tokenizer.additional_special_tokens) # more special tokens are added here # and then you should resize the embedding table of your model print(bart.model.shared.weight.shape) # before bart.resize_token_embeddings(len(tokenizer)) print(bart.model.shared.weight.shape) # after if __name__ == '__main__': main() """ 2 ['', ''] torch.Size([50265, 768]) torch.Size([50267, 768]) # you can see that 2 more embedding vectors have been added here. later, you may want to save the tokenizer after you add the idiom special tokens. """