tokenizers
Getting Started
Quicktour
Installation
The tokenization pipeline
Components
API Reference
Input sequences
Encode inputs
Tokenizer
Encoding
Added Tokens
tokenizers
ⓘ
You are viewing legacy docs. Go to
latest documentation
instead.
»
Index
Index
A
|
C
|
D
|
E
|
F
|
G
|
I
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
W
A
add_special_tokens() (tokenizers.Tokenizer method)
add_tokens() (tokenizers.Tokenizer method)
AddedToken (class in tokenizers)
attention_mask (tokenizers.Encoding attribute)
C
char_to_token() (tokenizers.Encoding method)
char_to_word() (tokenizers.Encoding method)
content (tokenizers.AddedToken attribute)
D
decode() (tokenizers.Tokenizer method)
decode_batch() (tokenizers.Tokenizer method)
decoder (tokenizers.Tokenizer attribute)
E
enable_padding() (tokenizers.Tokenizer method)
enable_truncation() (tokenizers.Tokenizer method)
encode() (tokenizers.Tokenizer method)
encode_batch() (tokenizers.Tokenizer method)
EncodeInput (in module tokenizers)
Encoding (class in tokenizers)
F
from_buffer() (tokenizers.Tokenizer static method)
from_file() (tokenizers.Tokenizer static method)
from_str() (tokenizers.Tokenizer static method)
G
get_vocab() (tokenizers.Tokenizer method)
get_vocab_size() (tokenizers.Tokenizer method)
I
id_to_token() (tokenizers.Tokenizer method)
ids (tokenizers.Encoding attribute)
InputSequence (in module tokenizers)
L
lstrip (tokenizers.AddedToken attribute)
M
merge() (tokenizers.Encoding static method)
model (tokenizers.Tokenizer attribute)
N
n_sequences (tokenizers.Encoding attribute)
no_padding() (tokenizers.Tokenizer method)
no_truncation() (tokenizers.Tokenizer method)
normalized (tokenizers.AddedToken attribute)
normalizer (tokenizers.Tokenizer attribute)
num_special_tokens_to_add() (tokenizers.Tokenizer method)
O
offsets (tokenizers.Encoding attribute)
overflowing (tokenizers.Encoding attribute)
P
pad() (tokenizers.Encoding method)
padding (tokenizers.Tokenizer attribute)
post_process() (tokenizers.Tokenizer method)
post_processor (tokenizers.Tokenizer attribute)
pre_tokenizer (tokenizers.Tokenizer attribute)
PreTokenizedEncodeInput (in module tokenizers)
PreTokenizedInputSequence (in module tokenizers)
R
rstrip (tokenizers.AddedToken attribute)
S
save() (tokenizers.Tokenizer method)
sequence_ids (tokenizers.Encoding attribute)
set_sequence_id() (tokenizers.Encoding method)
single_word (tokenizers.AddedToken attribute)
special_tokens_mask (tokenizers.Encoding attribute)
T
TextEncodeInput (in module tokenizers)
TextInputSequence (in module tokenizers)
to_str() (tokenizers.Tokenizer method)
token_to_chars() (tokenizers.Encoding method)
token_to_id() (tokenizers.Tokenizer method)
token_to_sequence() (tokenizers.Encoding method)
token_to_word() (tokenizers.Encoding method)
Tokenizer (class in tokenizers)
tokens (tokenizers.Encoding attribute)
truncate() (tokenizers.Encoding method)
truncation (tokenizers.Tokenizer attribute)
type_ids (tokenizers.Encoding attribute)
W
word_ids (tokenizers.Encoding attribute)
word_to_chars() (tokenizers.Encoding method)
word_to_tokens() (tokenizers.Encoding method)
words (tokenizers.Encoding attribute)