mxfeng commited on
Commit
d56a020
1 Parent(s): cfe17fd

commit from mxfeng

Browse files
Files changed (5) hide show
  1. config.json +32 -0
  2. merges.txt +0 -0
  3. pytorch_model.bin +3 -0
  4. training.log +0 -0
  5. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/opt-350m",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "architectures": [
7
+ "OPTForCausalLM"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": false,
12
+ "dropout": 0.0,
13
+ "enable_bias": true,
14
+ "end_token_id": 2,
15
+ "eos_token_id": 2,
16
+ "ffn_dim": 4096,
17
+ "hidden_size": 1024,
18
+ "init_std": 0.02,
19
+ "layer_norm_elementwise_affine": true,
20
+ "layerdrop": 0.0,
21
+ "max_position_embeddings": 2048,
22
+ "model_type": "opt",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 2,
26
+ "prefix": "</s>",
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.30.0",
29
+ "use_cache": true,
30
+ "vocab_size": 50272,
31
+ "word_embed_proj_dim": 512
32
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5a4f0ac03942384449a0edb2655deee50d9e7903bb1d81c3303478fc734481
3
+ size 662469311
training.log ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff