dqnguyen commited on
Commit
56e2f2b
1 Parent(s): 1175c5d

Upload 5 files

Browse files
Files changed (5) hide show
  1. bpe.codes +0 -0
  2. config.json +27 -0
  3. pytorch_model.bin +3 -0
  4. tokenizer.json +0 -0
  5. vocab.txt +0 -0
bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 258,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "position_embedding_type": "absolute",
21
+ "tokenizer_class": "PhobertTokenizer",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.26.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 64001
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba09eb4c244a5b3a49ad76d52d129ac085b61f5c6287de7f99508b02be589f9
3
+ size 540322347
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff