Upload 2 files
Browse files- config.json +2 -2
- modeling_aquila.py +7 -5
config.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"AquilaForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "modeling_aquila.LlamaConfig",
|
8 |
"AutoModel": "modeling_aquila.LlamaModel",
|
9 |
-
"AutoModelForCausalLM": "modeling_aquila.
|
10 |
},
|
11 |
"bos_token_id": 1,
|
12 |
"eos_token_id": 2,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "qhduan/aquila-7b",
|
3 |
"architectures": [
|
4 |
"AquilaForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "modeling_aquila.LlamaConfig",
|
8 |
"AutoModel": "modeling_aquila.LlamaModel",
|
9 |
+
"AutoModelForCausalLM": "modeling_aquila.LlamaForCausalLM"
|
10 |
},
|
11 |
"bos_token_id": 1,
|
12 |
"eos_token_id": 2,
|
modeling_aquila.py
CHANGED
@@ -250,12 +250,14 @@ class LlamaAttention(nn.Module):
|
|
250 |
key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim)
|
251 |
value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
252 |
self.freqs_cis = self.freqs_cis.to(hidden_states.device)
|
253 |
-
|
254 |
-
query_states = query_states.transpose(1, 2)
|
255 |
-
key_states = key_states.transpose(1, 2)
|
256 |
-
kv_seq_len = key_states.shape[-2]
|
257 |
if past_key_value is not None:
|
258 |
kv_seq_len += past_key_value[0].shape[-2]
|
|
|
|
|
|
|
|
|
|
|
259 |
|
260 |
# query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
261 |
# key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
@@ -695,7 +697,7 @@ class LlamaModel(LlamaPreTrainedModel):
|
|
695 |
)
|
696 |
|
697 |
|
698 |
-
class
|
699 |
def __init__(self, config):
|
700 |
super().__init__(config)
|
701 |
self.model = LlamaModel(config)
|
|
|
250 |
key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim)
|
251 |
value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
252 |
self.freqs_cis = self.freqs_cis.to(hidden_states.device)
|
253 |
+
kv_seq_len = key_states.shape[-3]
|
|
|
|
|
|
|
254 |
if past_key_value is not None:
|
255 |
kv_seq_len += past_key_value[0].shape[-2]
|
256 |
+
query_states, key_states = apply_rotary_pos_emb(
|
257 |
+
query_states, key_states, freqs_cis=self.freqs_cis[kv_seq_len-query_states.shape[1]:kv_seq_len]
|
258 |
+
)
|
259 |
+
query_states = query_states.transpose(1, 2)
|
260 |
+
key_states = key_states.transpose(1, 2)
|
261 |
|
262 |
# query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
263 |
# key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
|
|
697 |
)
|
698 |
|
699 |
|
700 |
+
class LlamaForCausalLM(LlamaPreTrainedModel):
|
701 |
def __init__(self, config):
|
702 |
super().__init__(config)
|
703 |
self.model = LlamaModel(config)
|