Upload folder using huggingface_hub
Browse files- modeling_qwen.py +2 -9
- tokenization_qwen.py +0 -2
modeling_qwen.py
CHANGED
@@ -37,7 +37,7 @@ from torch import nn
|
|
37 |
SUPPORT_CUDA = torch.cuda.is_available()
|
38 |
SUPPORT_BF16 = SUPPORT_CUDA and torch.cuda.is_bf16_supported()
|
39 |
SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7
|
40 |
-
SUPPORT_TORCH2 =
|
41 |
|
42 |
|
43 |
from .configuration_qwen import QWenConfig
|
@@ -414,7 +414,6 @@ class QWenAttention(nn.Module):
|
|
414 |
use_cache: Optional[bool] = False,
|
415 |
):
|
416 |
mixed_x_layer = self.c_attn(hidden_states)
|
417 |
-
#print("mixed out: ", mixed_x_layer)
|
418 |
|
419 |
query, key, value = mixed_x_layer.split(self.split_size, dim=2)
|
420 |
|
@@ -444,7 +443,6 @@ class QWenAttention(nn.Module):
|
|
444 |
key_list += [apply_rotary_pos_emb(key[i:i+1, :, :], k_pos_emb)]
|
445 |
query = torch.cat(query_list, dim=0)
|
446 |
key = torch.cat(key_list, dim=0)
|
447 |
-
#print("query: ", query, "key:", key)
|
448 |
|
449 |
if self.use_cache_quantization:
|
450 |
key = quantize_cache_v(key.permute(0, 2, 1, 3),
|
@@ -474,7 +472,7 @@ class QWenAttention(nn.Module):
|
|
474 |
# present=(key,value)
|
475 |
key = torch.cat((past_key, key), dim=1)
|
476 |
value = torch.cat((past_value, value), dim=1)
|
477 |
-
|
478 |
if use_cache:
|
479 |
present = (key, value)
|
480 |
else:
|
@@ -540,10 +538,8 @@ class QWenAttention(nn.Module):
|
|
540 |
context_layer = self._merge_heads(
|
541 |
attn_output, self.num_heads, self.head_dim
|
542 |
)
|
543 |
-
#print("context: ", context_layer)
|
544 |
|
545 |
attn_output = self.c_proj(context_layer)
|
546 |
-
#print("attn: ", attn_output)
|
547 |
|
548 |
outputs = (attn_output, present)
|
549 |
if output_attentions:
|
@@ -622,7 +618,6 @@ class QWenBlock(nn.Module):
|
|
622 |
use_cache=use_cache,
|
623 |
output_attentions=output_attentions,
|
624 |
)
|
625 |
-
#print("attn output: ", attn_outputs[0])
|
626 |
attn_output = attn_outputs[0]
|
627 |
|
628 |
outputs = attn_outputs[1:]
|
@@ -634,7 +629,6 @@ class QWenBlock(nn.Module):
|
|
634 |
|
635 |
residual = layernorm_input
|
636 |
mlp_output = self.mlp(layernorm_output)
|
637 |
-
#print("mlp output: ", mlp_output)
|
638 |
hidden_states = residual + mlp_output
|
639 |
|
640 |
if use_cache:
|
@@ -909,7 +903,6 @@ class QWenModel(QWenPreTrainedModel):
|
|
909 |
)
|
910 |
|
911 |
hidden_states = outputs[0]
|
912 |
-
#print(i, hidden_states)
|
913 |
if use_cache is True:
|
914 |
presents = presents + (outputs[1],)
|
915 |
|
|
|
37 |
SUPPORT_CUDA = torch.cuda.is_available()
|
38 |
SUPPORT_BF16 = SUPPORT_CUDA and torch.cuda.is_bf16_supported()
|
39 |
SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7
|
40 |
+
SUPPORT_TORCH2 = hasattr(torch, '__version__') and int(torch.__version__.split(".")[0]) >= 2
|
41 |
|
42 |
|
43 |
from .configuration_qwen import QWenConfig
|
|
|
414 |
use_cache: Optional[bool] = False,
|
415 |
):
|
416 |
mixed_x_layer = self.c_attn(hidden_states)
|
|
|
417 |
|
418 |
query, key, value = mixed_x_layer.split(self.split_size, dim=2)
|
419 |
|
|
|
443 |
key_list += [apply_rotary_pos_emb(key[i:i+1, :, :], k_pos_emb)]
|
444 |
query = torch.cat(query_list, dim=0)
|
445 |
key = torch.cat(key_list, dim=0)
|
|
|
446 |
|
447 |
if self.use_cache_quantization:
|
448 |
key = quantize_cache_v(key.permute(0, 2, 1, 3),
|
|
|
472 |
# present=(key,value)
|
473 |
key = torch.cat((past_key, key), dim=1)
|
474 |
value = torch.cat((past_value, value), dim=1)
|
475 |
+
|
476 |
if use_cache:
|
477 |
present = (key, value)
|
478 |
else:
|
|
|
538 |
context_layer = self._merge_heads(
|
539 |
attn_output, self.num_heads, self.head_dim
|
540 |
)
|
|
|
541 |
|
542 |
attn_output = self.c_proj(context_layer)
|
|
|
543 |
|
544 |
outputs = (attn_output, present)
|
545 |
if output_attentions:
|
|
|
618 |
use_cache=use_cache,
|
619 |
output_attentions=output_attentions,
|
620 |
)
|
|
|
621 |
attn_output = attn_outputs[0]
|
622 |
|
623 |
outputs = attn_outputs[1:]
|
|
|
629 |
|
630 |
residual = layernorm_input
|
631 |
mlp_output = self.mlp(layernorm_output)
|
|
|
632 |
hidden_states = residual + mlp_output
|
633 |
|
634 |
if use_cache:
|
|
|
903 |
)
|
904 |
|
905 |
hidden_states = outputs[0]
|
|
|
906 |
if use_cache is True:
|
907 |
presents = presents + (outputs[1],)
|
908 |
|
tokenization_qwen.py
CHANGED
@@ -243,8 +243,6 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
243 |
"""Converts an id to a token, special tokens included"""
|
244 |
if index in self.decoder:
|
245 |
return self.decoder[index]
|
246 |
-
print("error index", index)
|
247 |
-
return ""
|
248 |
raise ValueError("unknown ids")
|
249 |
|
250 |
def _convert_token_to_id(self, token: Union[bytes, str]) -> int:
|
|
|
243 |
"""Converts an id to a token, special tokens included"""
|
244 |
if index in self.decoder:
|
245 |
return self.decoder[index]
|
|
|
|
|
246 |
raise ValueError("unknown ids")
|
247 |
|
248 |
def _convert_token_to_id(self, token: Union[bytes, str]) -> int:
|