GuoPD commited on
Commit
262c8cb
1 Parent(s): 12f25aa

remove useless code

Browse files
Files changed (1) hide show
  1. modeling_baichuan.py +0 -7
modeling_baichuan.py CHANGED
@@ -171,9 +171,6 @@ class Attention(nn.Module):
171
  f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
172
  f" and `num_heads`: {self.num_heads})."
173
  )
174
- # self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
175
- # self.k_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
176
- # self.v_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
177
  self.W_pack = nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=False)
178
  self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
179
  self.rotary_emb = RotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings)
@@ -201,10 +198,6 @@ class Attention(nn.Module):
201
  value_states = proj[2].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1,
202
  2) # batch_size x source_len x hidden_size
203
 
204
- # query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
205
- # key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
206
- # value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
207
-
208
  kv_seq_len = key_states.shape[-2]
209
  if past_key_value is not None:
210
  kv_seq_len += past_key_value[0].shape[-2]
 
171
  f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
172
  f" and `num_heads`: {self.num_heads})."
173
  )
 
 
 
174
  self.W_pack = nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=False)
175
  self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
176
  self.rotary_emb = RotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings)
 
198
  value_states = proj[2].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1,
199
  2) # batch_size x source_len x hidden_size
200
 
 
 
 
 
201
  kv_seq_len = key_states.shape[-2]
202
  if past_key_value is not None:
203
  kv_seq_len += past_key_value[0].shape[-2]