keyfan commited on
Commit
c7dfd64
1 Parent(s): c90c4ac

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. modeling_qwen.py +2 -9
  2. tokenization_qwen.py +0 -2
modeling_qwen.py CHANGED
@@ -37,7 +37,7 @@ from torch import nn
37
  SUPPORT_CUDA = torch.cuda.is_available()
38
  SUPPORT_BF16 = SUPPORT_CUDA and torch.cuda.is_bf16_supported()
39
  SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7
40
- SUPPORT_TORCH2 = False #hasattr(torch, '__version__') and int(torch.__version__.split(".")[0]) >= 2
41
 
42
 
43
  from .configuration_qwen import QWenConfig
@@ -414,7 +414,6 @@ class QWenAttention(nn.Module):
414
  use_cache: Optional[bool] = False,
415
  ):
416
  mixed_x_layer = self.c_attn(hidden_states)
417
- #print("mixed out: ", mixed_x_layer)
418
 
419
  query, key, value = mixed_x_layer.split(self.split_size, dim=2)
420
 
@@ -444,7 +443,6 @@ class QWenAttention(nn.Module):
444
  key_list += [apply_rotary_pos_emb(key[i:i+1, :, :], k_pos_emb)]
445
  query = torch.cat(query_list, dim=0)
446
  key = torch.cat(key_list, dim=0)
447
- #print("query: ", query, "key:", key)
448
 
449
  if self.use_cache_quantization:
450
  key = quantize_cache_v(key.permute(0, 2, 1, 3),
@@ -474,7 +472,7 @@ class QWenAttention(nn.Module):
474
  # present=(key,value)
475
  key = torch.cat((past_key, key), dim=1)
476
  value = torch.cat((past_value, value), dim=1)
477
- #print("key: ", key, key.size(), value)
478
  if use_cache:
479
  present = (key, value)
480
  else:
@@ -540,10 +538,8 @@ class QWenAttention(nn.Module):
540
  context_layer = self._merge_heads(
541
  attn_output, self.num_heads, self.head_dim
542
  )
543
- #print("context: ", context_layer)
544
 
545
  attn_output = self.c_proj(context_layer)
546
- #print("attn: ", attn_output)
547
 
548
  outputs = (attn_output, present)
549
  if output_attentions:
@@ -622,7 +618,6 @@ class QWenBlock(nn.Module):
622
  use_cache=use_cache,
623
  output_attentions=output_attentions,
624
  )
625
- #print("attn output: ", attn_outputs[0])
626
  attn_output = attn_outputs[0]
627
 
628
  outputs = attn_outputs[1:]
@@ -634,7 +629,6 @@ class QWenBlock(nn.Module):
634
 
635
  residual = layernorm_input
636
  mlp_output = self.mlp(layernorm_output)
637
- #print("mlp output: ", mlp_output)
638
  hidden_states = residual + mlp_output
639
 
640
  if use_cache:
@@ -909,7 +903,6 @@ class QWenModel(QWenPreTrainedModel):
909
  )
910
 
911
  hidden_states = outputs[0]
912
- #print(i, hidden_states)
913
  if use_cache is True:
914
  presents = presents + (outputs[1],)
915
 
 
37
  SUPPORT_CUDA = torch.cuda.is_available()
38
  SUPPORT_BF16 = SUPPORT_CUDA and torch.cuda.is_bf16_supported()
39
  SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7
40
+ SUPPORT_TORCH2 = hasattr(torch, '__version__') and int(torch.__version__.split(".")[0]) >= 2
41
 
42
 
43
  from .configuration_qwen import QWenConfig
 
414
  use_cache: Optional[bool] = False,
415
  ):
416
  mixed_x_layer = self.c_attn(hidden_states)
 
417
 
418
  query, key, value = mixed_x_layer.split(self.split_size, dim=2)
419
 
 
443
  key_list += [apply_rotary_pos_emb(key[i:i+1, :, :], k_pos_emb)]
444
  query = torch.cat(query_list, dim=0)
445
  key = torch.cat(key_list, dim=0)
 
446
 
447
  if self.use_cache_quantization:
448
  key = quantize_cache_v(key.permute(0, 2, 1, 3),
 
472
  # present=(key,value)
473
  key = torch.cat((past_key, key), dim=1)
474
  value = torch.cat((past_value, value), dim=1)
475
+
476
  if use_cache:
477
  present = (key, value)
478
  else:
 
538
  context_layer = self._merge_heads(
539
  attn_output, self.num_heads, self.head_dim
540
  )
 
541
 
542
  attn_output = self.c_proj(context_layer)
 
543
 
544
  outputs = (attn_output, present)
545
  if output_attentions:
 
618
  use_cache=use_cache,
619
  output_attentions=output_attentions,
620
  )
 
621
  attn_output = attn_outputs[0]
622
 
623
  outputs = attn_outputs[1:]
 
629
 
630
  residual = layernorm_input
631
  mlp_output = self.mlp(layernorm_output)
 
632
  hidden_states = residual + mlp_output
633
 
634
  if use_cache:
 
903
  )
904
 
905
  hidden_states = outputs[0]
 
906
  if use_cache is True:
907
  presents = presents + (outputs[1],)
908
 
tokenization_qwen.py CHANGED
@@ -243,8 +243,6 @@ class QWenTokenizer(PreTrainedTokenizer):
243
  """Converts an id to a token, special tokens included"""
244
  if index in self.decoder:
245
  return self.decoder[index]
246
- print("error index", index)
247
- return ""
248
  raise ValueError("unknown ids")
249
 
250
  def _convert_token_to_id(self, token: Union[bytes, str]) -> int:
 
243
  """Converts an id to a token, special tokens included"""
244
  if index in self.decoder:
245
  return self.decoder[index]
 
 
246
  raise ValueError("unknown ids")
247
 
248
  def _convert_token_to_id(self, token: Union[bytes, str]) -> int: