Handle model parallelism
Browse filesWith this added line (similar to many models in Transformers), this model will work with `device_map="auto"` during training.
- modeling_codet5p.py +1 -0
modeling_codet5p.py
CHANGED
@@ -927,6 +927,7 @@ class CodeT5pEncoderDecoderModel(PreTrainedModel):
|
|
927 |
loss = None
|
928 |
if labels is not None:
|
929 |
# warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
|
|
930 |
logits = decoder_outputs.logits if return_dict else decoder_outputs[0]
|
931 |
loss_fct = CrossEntropyLoss()
|
932 |
loss = loss_fct(logits.reshape(-1, self.decoder.config.vocab_size), labels.view(-1))
|
|
|
927 |
loss = None
|
928 |
if labels is not None:
|
929 |
# warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
930 |
+
labels = labels.to(logits.device)
|
931 |
logits = decoder_outputs.logits if return_dict else decoder_outputs[0]
|
932 |
loss_fct = CrossEntropyLoss()
|
933 |
loss = loss_fct(logits.reshape(-1, self.decoder.config.vocab_size), labels.view(-1))
|