puffy310 commited on
Commit
9998550
1 Parent(s): 0368120

Update modeling_deepseek.py

Browse files
Files changed (1) hide show
  1. modeling_deepseek.py +0 -7
modeling_deepseek.py CHANGED
@@ -48,8 +48,6 @@ from transformers.pytorch_utils import (
48
  from transformers.utils import (
49
  add_start_docstrings,
50
  add_start_docstrings_to_model_forward,
51
- is_flash_attn_2_available,
52
- is_flash_attn_greater_or_equal_2_10,
53
  logging,
54
  replace_return_docstrings,
55
  )
@@ -58,11 +56,6 @@ from .configuration_deepseek import DeepseekV2Config
58
  import torch.distributed as dist
59
  import numpy as np
60
 
61
- if is_flash_attn_2_available():
62
- from flash_attn import flash_attn_func, flash_attn_varlen_func
63
- from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
64
-
65
-
66
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
67
  # It means that the function will not be traced through and simply appear as a node in the graph.
68
  if is_torch_fx_available():
 
48
  from transformers.utils import (
49
  add_start_docstrings,
50
  add_start_docstrings_to_model_forward,
 
 
51
  logging,
52
  replace_return_docstrings,
53
  )
 
56
  import torch.distributed as dist
57
  import numpy as np
58
 
 
 
 
 
 
59
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
60
  # It means that the function will not be traced through and simply appear as a node in the graph.
61
  if is_torch_fx_available():