helboukkouri commited on
Commit
e4a209d
1 Parent(s): 0579dcd

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "helboukkouri/character-bert",
3
+ "architectures": [
4
+ "CharacterBertForPreTraining"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_character_bert.CharacterBertConfig",
8
+ "AutoModel": "modeling_character_bert.CharacterBertForPreTraining",
9
+ "AutoModelForMaskedLM": "modeling_character_bert.CharacterBertForMaskedLM"
10
+ },
11
+ "attention_probs_dropout_prob": 0.1,
12
+ "character_embeddings_dim": 16,
13
+ "cnn_activation": "relu",
14
+ "cnn_filters": [
15
+ [
16
+ 1,
17
+ 32
18
+ ],
19
+ [
20
+ 2,
21
+ 32
22
+ ],
23
+ [
24
+ 3,
25
+ 64
26
+ ],
27
+ [
28
+ 4,
29
+ 128
30
+ ],
31
+ [
32
+ 5,
33
+ 256
34
+ ],
35
+ [
36
+ 6,
37
+ 512
38
+ ],
39
+ [
40
+ 7,
41
+ 1024
42
+ ]
43
+ ],
44
+ "hidden_act": "gelu",
45
+ "hidden_dropout_prob": 0.1,
46
+ "hidden_size": 768,
47
+ "initializer_range": 0.02,
48
+ "intermediate_size": 3072,
49
+ "layer_norm_eps": 1e-12,
50
+ "max_position_embeddings": 512,
51
+ "max_word_length": 50,
52
+ "mlm_vocab_size": 100000,
53
+ "model_type": "character_bert",
54
+ "num_attention_heads": 12,
55
+ "num_hidden_layers": 12,
56
+ "num_highway_layers": 2,
57
+ "tie_word_embeddings": false,
58
+ "transformers_version": "4.7.0.dev0",
59
+ "type_vocab_size": 2,
60
+ "use_cache": true
61
+ }
mlm_vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece0a0396cc31225ee75a56b46ad584dd566206098f218b1f8fc06ae0da55c81
3
+ size 728491415
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name_or_path": "helboukkouri/character-bert", "tokenizer_class": "CharacterBertTokenizer", "max_word_length": 50, "do_lower_case": true, "do_basic_tokenize": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "auto_map": {"AutoTokenizer": ["tokenization_character_bert.CharacterBertTokenizer", null]}}
vocab.txt ADDED
File without changes