File size: 688 Bytes
d76004b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c3adeb
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
{
  "tokenizer_name": "CSUMLM Tokenizer",
  "model_name": "CSUMLM",
  "description": "Tokenizer for the CognoSphere Unified Multimodal Language Model",
  "author": "Or4cl3 AI Solutions",
  "language": "Multimodal (Text, Image, Audio)",
  "vocab_size": 32000,
  "max_sequence_length": 512,
  "special_tokens": {
    "bos_token": "<BOS>",
    "eos_token": "<EOS>",
    "pad_token": "<PAD>",
    "unk_token": "<UNK>",
    "mask_token": "<MASK>"
  },
  "tokenization_method": "Byte Pair Encoding (BPE)",
  "training_data": "Custom 1500 Example Dataset",
  "chat_template": "[BOS] {context} {user_input} {response} [EOS]",
  "pad_to_max_length": true,
  "truncation_strategy": "only_second"
}