polemo_intensity / model_script.py
hplisiecki's picture
major update
5c11ad4
import os
import torch
import torch.nn as nn
from transformers import AutoModel, AutoConfig
class Model(nn.Module):
def __init__(
self,
model_dir: str,
metric_names: list[str],
dropout: float = 0.2,
hidden_dim: int | None = None
):
"""
:param model_dir: path or HF identifier for base Transformer
:param metric_names: list of your emotion columns, e.g.
['Happiness_M','Sadness_M', …,'Arousal_M']
:param dropout: dropout rate after LayerNorm
:param hidden_dim: if None, inferred from model.config.hidden_size
"""
super().__init__()
self.metric_names = metric_names
self.bert = AutoModel.from_pretrained(model_dir)
# infer hidden dim if not provided
if hidden_dim is None:
hidden_dim = self.bert.config.hidden_size
# per‐metric heads
for name in self.metric_names:
setattr(self, name, nn.Linear(hidden_dim, 1))
setattr(self, 'l_1_' + name, nn.Linear(hidden_dim, hidden_dim))
self.layer_norm = nn.LayerNorm(hidden_dim)
self.relu = nn.ReLU()
self.dropout_layer = nn.Dropout(dropout)
self.sigmoid = nn.Sigmoid()
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor):
# RobertaModel returns (last_hidden_state, pooled_output) when return_dict=False
_, pooled = self.bert(
input_ids = input_ids,
attention_mask = attention_mask,
return_dict = False
)
return self.rate_embedding(pooled)
def rate_embedding(self, x: torch.Tensor):
outputs = []
for name in self.metric_names:
h = getattr(self, 'l_1_' + name)(x)
# residual + norm + dropout + activation
h = self.relu(self.dropout_layer(self.layer_norm(h + x)))
out = self.sigmoid(getattr(self, name)(h))
outputs.append(out)
return outputs
def save_pretrained(self, save_directory: str):
"""
Saves:
- config.json (with custom metric_names)
- the base model files (via HF save_pretrained)
- pytorch_model.bin (full state_dict)
"""
os.makedirs(save_directory, exist_ok=True)
# extend HF config with our metric_names
config = self.bert.config
config.metric_names = self.metric_names
config.save_pretrained(save_directory)
self.bert.save_pretrained(save_directory)
torch.save(self.state_dict(), os.path.join(save_directory, 'pytorch_model.bin'))
@classmethod
def from_pretrained(
cls,
bert_model_dir: str,
state_dict_path: str | None = None,
dropout: float = 0.2,
hidden_dim: int | None = None,
metric_names: list[str] | None = None
):
"""
Loads your production model in two pieces:
1) the base Transformer from `bert_model_dir` (must be a model name or dir with a config.json)
2) your old state‐dict from `state_dict_path` (a file, e.g. no-extension)
You must pass exactly the same metric_names you trained with,
unless you previously called save_pretrained (which writes them into config.json).
"""
# 1) try to read config to recover metric_names & hidden_dim
config = AutoConfig.from_pretrained(bert_model_dir)
cfg_names = getattr(config, 'metric_names', None)
cfg_hidden = getattr(config, 'hidden_size', None)
_metric_names = metric_names or cfg_names
_hidden_dim = hidden_dim or cfg_hidden
if _metric_names is None or _hidden_dim is None:
raise ValueError(
"Must provide `metric_names` and `hidden_dim` "
"either as arguments or stored in config.json."
)
# instantiate
model = cls(
model_dir = bert_model_dir,
metric_names = _metric_names,
dropout = dropout,
hidden_dim = _hidden_dim
)
# 2) load your saved state‐dict
if state_dict_path:
if not os.path.isfile(state_dict_path):
raise FileNotFoundError(f"State‐dict file not found: {state_dict_path}")
sd = torch.load(state_dict_path, map_location='cpu')
model.load_state_dict(sd)
else:
# fallback: look for pytorch_model.bin in bert_model_dir
fallback = os.path.join(bert_model_dir, 'pytorch_model.bin')
if os.path.isfile(fallback):
sd = torch.load(fallback, map_location='cpu')
model.load_state_dict(sd)
else:
raise FileNotFoundError(
"No state‐dict provided and no pytorch_model.bin in the model_dir."
)
return model