|
from transformers import BertPreTrainedModel, BertModel, BertConfig |
|
from .reward_model_config import RewardModelConfig |
|
import torch |
|
|
|
class RewardModel(BertPreTrainedModel): |
|
""" |
|
RewardModel class for PyTorch |
|
|
|
Args: |
|
config (transformers.configuration): model configuration |
|
|
|
Returns: |
|
output (torch.tensor): tensor containing the output logits [-1,1] |
|
""" |
|
config_class = RewardModelConfig |
|
|
|
def __init__(self, config): |
|
super().__init__(config) |
|
self.bert = BertModel(config) |
|
|
|
self.cls_layer1 = torch.nn.Linear(config.hidden_size,config.linear_layer) |
|
self.relu1 = torch.nn.ReLU() |
|
self.ff1 = torch.nn.Linear(config.linear_layer,config.linear_layer) |
|
self.tanh1 = torch.nn.Tanh() |
|
self.ff2 = torch.nn.Linear(config.linear_layer,config.linear_layer_output) |
|
|
|
def forward(self, input_ids, attention_mask, alpha=1): |
|
|
|
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) |
|
|
|
logits = outputs.last_hidden_state[:,0,:] |
|
output = self.cls_layer1(logits) |
|
output = self.relu1(output) |
|
output = self.ff1(output) |
|
output = self.tanh1(output) |
|
output = self.ff2(output) |
|
|
|
|
|
if not self.training: |
|
|
|
|
|
output = torch.mul(output, alpha) |
|
|
|
return output |