StyleRM v2

This model is a dual-head reward model trained for literary prose. It features a style head for ranking prose quality and a faithfulness head used as a veto mechanism.

Architecture

Backbone: answerdotai/ModernBERT-large
Objective: style_primary_faith_veto_flat_copy_length_gm_v3

Inference Example

To use this model, you must define the RewardModel class structure and load the provided weights.

import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer

class RewardModel(nn.Module):
    def __init__(self, model_id, veto_beta=2.0, veto_tau=0.0):
        super().__init__()
        self.backbone = AutoModel.from_pretrained(model_id)
        hidden = self.backbone.config.hidden_size
        self.style_head = nn.Sequential(nn.LayerNorm(hidden), nn.Linear(hidden, hidden // 4), nn.GELU(), nn.Linear(hidden // 4, 1))
        self.faith_head = nn.Sequential(nn.LayerNorm(hidden), nn.Linear(hidden, hidden // 4), nn.GELU(), nn.Linear(hidden // 4, 1))
        self.veto_beta, self.veto_tau = veto_beta, veto_tau

    def forward(self, enc):
        hidden = self.backbone(**enc).last_hidden_state
        mask = enc['attention_mask'].unsqueeze(-1).float()
        pooled = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-6)
        style = self.style_head(pooled).squeeze(-1)
        faith = self.faith_head(pooled).squeeze(-1)
        reward = style - self.veto_beta * F.softplus(self.veto_tau - faith)
        return reward

# 1. Load Model & Tokenizer
repo_id = "3rd-Degree-Burn/stylerm-v2"
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model = RewardModel("answerdotai/ModernBERT-large")

# Load weights from model.pt
checkpoint = torch.load("model.pt", map_location="cpu")
model.load_state_dict(checkpoint['state_dict'])
model.eval()

# 2. Score Candidates
source = "The storm came quickly and covered the whole valley in clouds."
candidates = [
    "The storm arrived and there were clouds everywhere in the valley.",
    "The storm broke with a sudden, violet urgency, swaddling the valley in a thick, suffocating wool of grey."
]

with torch.no_grad():
    inputs = tokenizer([source]*len(candidates), candidates, padding=True, truncation=True, return_tensors="pt")
    inputs.pop("token_type_ids", None)
    scores = model(inputs)

for i, score in enumerate(scores):
    print(f"Candidate {i} Reward: {score.item():.4f}")

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support