CustomLEDForResultsIdOnSeperatedSet / modeling_CustomLEDForResultsId.py
ekolasky's picture
Update modeling_CustomLEDForResultsId.py
f5b20aa
# from transformers.models.led.modeling_led import LEDEncoder
from transformers import LEDConfig, LEDModel, LEDPreTrainedModel
from transformers.modeling_outputs import TokenClassifierOutput
import torch.nn as nn
class CustomLEDForResultsIdModel(LEDPreTrainedModel):
def __init__(self, config: LEDConfig, checkpoint=None):
super().__init__(config)
self.num_labels = config.num_labels
print("Configs")
print(config.num_labels)
print(config.dropout)
#Load Model with given checkpoint and extract its body
if (checkpoint):
self.led = LEDModel.from_pretrained(checkpoint, config=config).get_encoder()
else:
self.led = LEDModel(config).get_encoder()
# self.model = LEDEncoder.from_pretrained(checkpoint, config=config)
self.dropout = nn.Dropout(config.dropout)
self.classifier = nn.Linear(self.led.config.d_model,self.num_labels) # load and initialize weights
def forward(self, input_ids=None, attention_mask=None, labels=None, global_attention_mask=None, return_loss=True):
#Extract outputs from the body
outputs = self.led(input_ids=input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask)
sequence_output = self.dropout(outputs.last_hidden_state)
logits = self.classifier(sequence_output) # calculate losses [:,0,:].view(-1,768)
# start_logits, end_logits = logits.split(1, dim=-1)
# start_logits = start_logits.squeeze(-1).contiguous()
# end_logits = end_logits.squeeze(-1).contiguous()
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return {
'loss': loss,
'logits': logits
}