DAMO-NLP-SG
/

CLEX-7B-Chat-16K

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Guanzheng commited on Oct 26, 2023

Commit

1f5d87f

·

1 Parent(s): 22df4da

Update modeling_llama.py

Files changed (1) hide show

modeling_llama.py +8 -8

modeling_llama.py CHANGED Viewed

@@ -646,14 +646,14 @@ class LlamaModel(LlamaPreTrainedModel):
         if inputs_embeds is None:
             inputs_embeds = self.embed_tokens(input_ids)
         # embed positions
-        # if attention_mask is None:
-        #     attention_mask = torch.ones(
-        #         (batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
-        #     )
-        # attention_mask = self._prepare_decoder_attention_mask(
-        #     attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
-        # )
-        attention_mask = None
         hidden_states = inputs_embeds

         if inputs_embeds is None:
             inputs_embeds = self.embed_tokens(input_ids)
         # embed positions
+        if attention_mask is None:
+            attention_mask = torch.ones(
+                (batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
+            )
+        attention_mask = self._prepare_decoder_attention_mask(
+            attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
+        )
+        # attention_mask = None
         hidden_states = inputs_embeds