UltraRM-13B-GPTQ / modeling_llama_rm.py
TheBloke's picture
GPTQ model commit
7f8b405
raw
history blame contribute delete
No virus
1.78 kB
from transformers import PreTrainedModel, LlamaConfig, LlamaModel
import torch.nn as nn
import torch
from typing import Optional, List
class LlamaRewardModel(PreTrainedModel):
config_class = LlamaConfig
def __init__(self, config):
super().__init__(config)
self.model = LlamaModel(config)
self.regression_head = nn.Linear(self.config.hidden_size, 1, bias=False)
def forward( # args are the same as LlamaForCausalLM
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
):
transformer_outputs = self.model.model(
input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
)
hidden_states = transformer_outputs[0]
rewards = self.regression_head(hidden_states).squeeze(-1)
ends = attention_mask.cumsum(dim=1).argmax(dim=1).view(-1,1)
rewards = torch.gather(rewards, 1, ends)
return reward_models
model = LlamaRewardModel.from_pretrained("UltraRM-13b-32").half()
model.save_pretrained("UltraRM-13b")