File size: 3,989 Bytes
db65808
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{
  "_name_or_path": "Phi-3-small-128k-instruct",
  "architectures": [
    "Phi3SmallForCausalLM"
  ],
  "attention_dropout_prob": 0.0,
  "auto_map": {
    "AutoConfig": "configuration_phi3_small.Phi3SmallConfig",
    "AutoModelForCausalLM": "modeling_phi3_small.Phi3SmallForCausalLM",
    "AutoTokenizer": "tokenization_phi3_small.Phi3SmallTokenizer"
  },
  "blocksparse_block_size": 64,
  "blocksparse_homo_head_pattern": false,
  "blocksparse_num_local_blocks": 16,
  "blocksparse_triton_kernel_block_size": 64,
  "blocksparse_vert_stride": 8,
  "bos_token_id": 100257,
  "dense_attention_every_n_layers": 2,
  "embedding_dropout_prob": 0.1,
  "eos_token_id": 100257,
  "ff_dim_multiplier": null,
  "ff_intermediate_size": 14336,
  "ffn_dropout_prob": 0.1,
  "gegelu_limit": 20.0,
  "gegelu_pad_to_256": true,
  "hidden_act": "gegelu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "max_position_embeddings": 131072,
  "model_type": "phi3small",
  "mup_attn_multiplier": 1.0,
  "mup_embedding_multiplier": 10.0,
  "mup_use_scaling": true,
  "mup_width_multiplier": 8.0,
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "original_max_position_embeddings": 8192,
  "pad_sequence_to_multiple_of_64": true,
  "reorder_and_upcast_attn": false,
  "rope_embedding_base": 1000000,
  "rope_position_scale": 1.0,
  "rope_scaling": {
    "long_factor": [
      1.0,
      1.01,
      1.01,
      1.02,
      1.04,
      1.04,
      1.04,
      1.05,
      1.05,
      1.06,
      1.07,
      1.08,
      1.08,
      1.08,
      1.08,
      1.08,
      1.08,
      1.08,
      1.09,
      1.09,
      1.2,
      2.31,
      3.76,
      9.38,
      10.1,
      10.8,
      18.1,
      25.2,
      25.3,
      26.1,
      26.6,
      30.2,
      33.0,
      41.5,
      44.4,
      44.8,
      50.2,
      51.9,
      59.3,
      62.7,
      66.1,
      66.3,
      85.8,
      89.3,
      90.0,
      99.9,
      107.0,
      110.0,
      111.0,
      117.0,
      118.0,
      121.0,
      122.0,
      127.0,
      127.0,
      128.0,
      128.0,
      128.0,
      128.0,
      128.0,
      128.0,
      129.0,
      129.0,
      129.0
    ],
    "long_mscale": 1.1902380714238083,
    "original_max_position_embeddings": 8192,
    "short_factor": [
      1.02,
      1.02,
      1.05,
      1.05,
      1.06,
      1.08,
      1.08,
      1.08,
      1.08,
      1.12,
      1.1800000000000002,
      1.1900000000000002,
      1.1900000000000002,
      1.2100000000000002,
      1.2300000000000002,
      1.2400000000000002,
      1.2400000000000002,
      1.2500000000000002,
      1.3000000000000003,
      1.3100000000000003,
      1.4600000000000004,
      1.5100000000000005,
      1.7000000000000006,
      1.9300000000000008,
      2.080000000000001,
      2.4399999999999933,
      3.2199999999999767,
      3.4499999999999718,
      3.579999999999969,
      4.669999999999946,
      4.779999999999943,
      5.999999999999917,
      6.009999999999917,
      6.4199999999999084,
      6.619999999999904,
      7.189999999999892,
      7.3099999999998895,
      7.339999999999889,
      7.479999999999886,
      9.749999999999837,
      10.919999999999812,
      11.219999999999805,
      11.749999999999794,
      11.979999999999789,
      13.239999999999762,
      13.579999999999755,
      13.669999999999753,
      13.82999999999975,
      14.009999999999746,
      14.679999999999731,
      14.889999999999727,
      15.769999999999708,
      15.769999999999708,
      15.819999999999707,
      15.839999999999707,
      15.919999999999705,
      16.029999999999703,
      16.12999999999972,
      16.44999999999977,
      16.44999999999977,
      16.77999999999982,
      16.83999999999983,
      16.83999999999983,
      16.889999999999837
    ],
    "short_mscale": 1.0,
    "type": "su"
  },
  "torch_dtype": "bfloat16",
  "transformers_version": "4.38.1",
  "use_cache": true,
  "vocab_size": 100352
}