YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
StyleRM v2
This model is a dual-head reward model trained for literary prose. It features a style head for ranking prose quality and a faithfulness head used as a veto mechanism.
Architecture
- Backbone: answerdotai/ModernBERT-large
- Objective: style_primary_faith_veto_flat_copy_length_gm_v3
Inference Example
To use this model, you must define the RewardModel class structure and load the provided weights.
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
class RewardModel(nn.Module):
def __init__(self, model_id, veto_beta=2.0, veto_tau=0.0):
super().__init__()
self.backbone = AutoModel.from_pretrained(model_id)
hidden = self.backbone.config.hidden_size
self.style_head = nn.Sequential(nn.LayerNorm(hidden), nn.Linear(hidden, hidden // 4), nn.GELU(), nn.Linear(hidden // 4, 1))
self.faith_head = nn.Sequential(nn.LayerNorm(hidden), nn.Linear(hidden, hidden // 4), nn.GELU(), nn.Linear(hidden // 4, 1))
self.veto_beta, self.veto_tau = veto_beta, veto_tau
def forward(self, enc):
hidden = self.backbone(**enc).last_hidden_state
mask = enc['attention_mask'].unsqueeze(-1).float()
pooled = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-6)
style = self.style_head(pooled).squeeze(-1)
faith = self.faith_head(pooled).squeeze(-1)
reward = style - self.veto_beta * F.softplus(self.veto_tau - faith)
return reward
# 1. Load Model & Tokenizer
repo_id = "3rd-Degree-Burn/stylerm-v2"
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model = RewardModel("answerdotai/ModernBERT-large")
# Load weights from model.pt
checkpoint = torch.load("model.pt", map_location="cpu")
model.load_state_dict(checkpoint['state_dict'])
model.eval()
# 2. Score Candidates
source = "The storm came quickly and covered the whole valley in clouds."
candidates = [
"The storm arrived and there were clouds everywhere in the valley.",
"The storm broke with a sudden, violet urgency, swaddling the valley in a thick, suffocating wool of grey."
]
with torch.no_grad():
inputs = tokenizer([source]*len(candidates), candidates, padding=True, truncation=True, return_tensors="pt")
inputs.pop("token_type_ids", None)
scores = model(inputs)
for i, score in enumerate(scores):
print(f"Candidate {i} Reward: {score.item():.4f}")
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support