mariavilla commited on
Commit
5ae13e9
1 Parent(s): ca556b3

Upload MixFormerSequentialForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +5 -10
  2. generation_config.json +1 -1
  3. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,35 +1,30 @@
1
  {
2
  "_name_or_path": "microsoft/phi-1_5",
3
  "activation_function": "gelu_new",
4
- "architecture": {
5
- "block_cls": "parallel",
6
- "mixer": {},
7
- "mlp": {
8
- "mlp_cls": "mlp"
9
- }
10
- },
11
  "architectures": [
12
  "MixFormerSequentialForCausalLM"
13
  ],
 
14
  "auto_map": {
15
  "AutoConfig": "microsoft/phi-1_5--configuration_mixformer_sequential.MixFormerSequentialConfig",
16
  "AutoModelForCausalLM": "microsoft/phi-1_5--modeling_mixformer_sequential.MixFormerSequentialForCausalLM"
17
  },
18
- "embd_layer": "default",
19
  "embd_pdrop": 0.0,
 
 
20
  "initializer_range": 0.02,
21
  "layer_norm_epsilon": 1e-05,
22
  "model_type": "mixformer-sequential",
23
  "n_embd": 2048,
24
  "n_head": 32,
 
25
  "n_inner": null,
26
  "n_layer": 24,
27
  "n_positions": 2048,
28
- "phyagi_version": "0.0.4.dev",
29
  "resid_pdrop": 0.0,
30
  "rotary_dim": 32,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "float32",
33
- "transformers_version": "4.33.1",
34
  "vocab_size": 51200
35
  }
 
1
  {
2
  "_name_or_path": "microsoft/phi-1_5",
3
  "activation_function": "gelu_new",
 
 
 
 
 
 
 
4
  "architectures": [
5
  "MixFormerSequentialForCausalLM"
6
  ],
7
+ "attn_pdrop": 0.0,
8
  "auto_map": {
9
  "AutoConfig": "microsoft/phi-1_5--configuration_mixformer_sequential.MixFormerSequentialConfig",
10
  "AutoModelForCausalLM": "microsoft/phi-1_5--modeling_mixformer_sequential.MixFormerSequentialForCausalLM"
11
  },
 
12
  "embd_pdrop": 0.0,
13
+ "flash_rotary": false,
14
+ "fused_dense": false,
15
  "initializer_range": 0.02,
16
  "layer_norm_epsilon": 1e-05,
17
  "model_type": "mixformer-sequential",
18
  "n_embd": 2048,
19
  "n_head": 32,
20
+ "n_head_kv": null,
21
  "n_inner": null,
22
  "n_layer": 24,
23
  "n_positions": 2048,
 
24
  "resid_pdrop": 0.0,
25
  "rotary_dim": 32,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "float32",
28
+ "transformers_version": "4.34.1",
29
  "vocab_size": 51200
30
  }
generation_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.33.1"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "transformers_version": "4.34.1"
4
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7db1a628a23bf56cdba7c12063d81436873d78b48ecfc1d09415b344b52ec28a
3
- size 5673167489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3236303f601e7522d6dc0575b39970f7f9c83fd18a0005c360d44a87d24fb0c
3
+ size 5673158870