khulnasoft commited on
Commit
e4f8f7f
1 Parent(s): 27dc1ec

Create config.json

Browse files
Files changed (1) hide show
  1. config.json +24 -23
config.json CHANGED
@@ -1,27 +1,28 @@
1
  {
2
- "_name_or_path": "/checkpoint/dpf/models/cm-1.3B-hf",
3
- "activation_dropout": 0.0,
4
- "activation_function": "gelu",
5
  "architectures": [
6
- "XGLMForCausalLM"
7
  ],
8
- "attention_dropout": 0.1,
9
- "attention_heads": 32,
10
- "bos_token_id": 0,
11
- "d_model": 2048,
12
- "decoder_start_token_id": 2,
13
- "dropout": 0.1,
14
- "eos_token_id": 2,
15
- "ffn_dim": 8192,
16
- "init_std": 0.02,
17
- "layerdrop": 0.0,
18
- "max_position_embeddings": 2048,
19
- "model_type": "xglm",
20
- "num_layers": 24,
21
- "pad_token_id": 1,
22
- "scale_embedding": true,
23
- "torch_dtype": "float16",
24
- "transformers_version": "4.18.0.dev0",
 
 
 
 
25
  "use_cache": true,
26
- "vocab_size": 50518
27
- }
 
1
  {
 
 
 
2
  "architectures": [
3
+ "LlamaForCausalLM"
4
  ],
5
+ "bos_token_id": 32013,
6
+ "eos_token_id": 32021,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 2048,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5504,
11
+ "max_position_embeddings": 16384,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 16,
14
+ "num_hidden_layers": 24,
15
+ "num_key_value_heads": 16,
16
+ "pretraining_tp": 1,
17
+ "rms_norm_eps": 1e-06,
18
+ "rope_scaling": {
19
+ "factor": 4.0,
20
+ "type": "linear"
21
+ },
22
+ "rope_theta": 100000,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.33.1",
26
  "use_cache": true,
27
+ "vocab_size": 32256
28
+ }