nanocatalyst / tokenizer_v3 /tokenizer_stats.json
everythingchalna's picture
Upload folder using huggingface_hub
35547c6 verified
{
"vocab_size": 181,
"n_sampled": 10000,
"chars_per_token": {
"mean": 1.824003565805524,
"median": 1.8262004175365345
},
"token_length": {
"mean": 1083.7929,
"std": 435.14883937520733,
"min": 176,
"max": 3214,
"p50": 1020.0,
"p95": 1929.0,
"p99": 2462.0
},
"coverage_by_seq_len": {
"512": 4.98,
"768": 22.01,
"1024": 51.19,
"1536": 87.63,
"2048": 96.26,
"3072": 99.93,
"4096": 100.0
}
}