File size: 2,208 Bytes
b81f538 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# models/roberta_model.py
import torch
import torch.nn as nn
from transformers import RobertaModel
from config import DROPOUT_RATE, ROBERTA_MODEL_NAME # Import ROBERTA_MODEL_NAME
class RobertaMultiOutputModel(nn.Module):
"""
RoBERTa-based model for multi-output classification.
Uses a pre-trained RoBERTa model as its backbone. RoBERTa is an optimized
version of BERT, often performing better.
"""
# Statically set tokenizer name for easy access in main.py
tokenizer_name = ROBERTA_MODEL_NAME
def __init__(self, num_labels):
"""
Initializes the RobertaMultiOutputModel.
Args:
num_labels (list): A list where each element is the number of classes
for a corresponding label column.
"""
super(RobertaMultiOutputModel, self).__init__()
# Load the pre-trained RoBERTa model.
# RoBERTa's pooler_output typically corresponds to the hidden state of the
# first token (<s>), which is often used for sequence classification.
self.roberta = RobertaModel.from_pretrained(ROBERTA_MODEL_NAME)
self.dropout = nn.Dropout(DROPOUT_RATE) # Dropout layer
# Create classification heads for each label column.
self.classifiers = nn.ModuleList([
nn.Linear(self.roberta.config.hidden_size, n_classes) for n_classes in num_labels
])
def forward(self, input_ids, attention_mask):
"""
Performs the forward pass of the model.
Args:
input_ids (torch.Tensor): Tensor of token IDs.
attention_mask (torch.Tensor): Tensor indicating attention.
Returns:
list: A list of logit tensors, one for each classification head.
"""
# Pass input_ids and attention_mask through RoBERTa.
# .pooler_output is used for classification.
pooled_output = self.roberta(input_ids=input_ids, attention_mask=attention_mask).pooler_output
# Apply dropout
pooled_output = self.dropout(pooled_output)
# Pass the pooled output through each classification head.
return [classifier(pooled_output) for classifier in self.classifiers] |