Muennighoff commited on
Commit
900f290
1 Parent(s): c50f64a

Update modeling_gritlm7b.py

Browse files
Files changed (1) hide show
  1. modeling_gritlm7b.py +7 -7
modeling_gritlm7b.py CHANGED
@@ -30,12 +30,12 @@ import torch.utils.checkpoint
30
  from torch import nn
31
  from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
32
 
33
- from ...activations import ACT2FN
34
- from ...cache_utils import Cache, DynamicCache
35
- from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa, _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
36
- from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
37
- from ...modeling_utils import PreTrainedModel
38
- from ...utils import (
39
  add_start_docstrings,
40
  add_start_docstrings_to_model_forward,
41
  is_flash_attn_2_available,
@@ -43,7 +43,7 @@ from ...utils import (
43
  logging,
44
  replace_return_docstrings,
45
  )
46
- from .configuration_mistral import MistralConfig
47
 
48
 
49
  try:
 
30
  from torch import nn
31
  from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
32
 
33
+ from transformers.activations import ACT2FN
34
+ from transformers.cache_utils import Cache, DynamicCache
35
+ from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa, _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
36
+ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
37
+ from transformers.modeling_utils import PreTrainedModel
38
+ from transformers.utils import (
39
  add_start_docstrings,
40
  add_start_docstrings_to_model_forward,
41
  is_flash_attn_2_available,
 
43
  logging,
44
  replace_return_docstrings,
45
  )
46
+ from transformers import MistralConfig
47
 
48
 
49
  try: