|
|
""" |
|
|
Model definition and utilities |
|
|
""" |
|
|
from transformers import AutoModelForSequenceClassification, AutoConfig |
|
|
from typing import Dict, Optional |
|
|
import logging |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
|
|
|
|
|
|
def create_model( |
|
|
model_name: str, |
|
|
num_labels: int, |
|
|
label2id: Dict[str, int], |
|
|
id2label: Dict[int, str], |
|
|
dropout: Optional[float] = None |
|
|
): |
|
|
""" |
|
|
Create a sequence classification model with optional dropout configuration. |
|
|
|
|
|
Args: |
|
|
model_name: Name of the pretrained model |
|
|
num_labels: Number of classification labels |
|
|
label2id: Mapping from label names to IDs |
|
|
id2label: Mapping from IDs to label names |
|
|
dropout: Optional dropout probability for classifier head |
|
|
|
|
|
Returns: |
|
|
Initialized model |
|
|
""" |
|
|
config = AutoConfig.from_pretrained( |
|
|
model_name, |
|
|
num_labels=num_labels, |
|
|
label2id=label2id, |
|
|
id2label=id2label |
|
|
) |
|
|
|
|
|
|
|
|
if dropout is not None: |
|
|
if hasattr(config, 'hidden_dropout_prob'): |
|
|
config.hidden_dropout_prob = dropout |
|
|
if hasattr(config, 'attention_probs_dropout_prob'): |
|
|
config.attention_probs_dropout_prob = dropout |
|
|
if hasattr(config, 'classifier_dropout'): |
|
|
config.classifier_dropout = dropout |
|
|
logging.info(f"Set model dropout to {dropout}") |
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
|
model_name, |
|
|
config=config |
|
|
) |
|
|
|
|
|
return model |
|
|
|
|
|
|
|
|
def apply_class_weights( |
|
|
model: nn.Module, |
|
|
class_weights: Optional[list] = None |
|
|
) -> Optional[nn.Module]: |
|
|
""" |
|
|
Apply class weights to the model's loss function. |
|
|
|
|
|
Args: |
|
|
model: The model to modify |
|
|
class_weights: List of weights for each class (must match num_labels) |
|
|
|
|
|
Returns: |
|
|
Model with modified loss function (if class_weights provided) |
|
|
""" |
|
|
if class_weights is not None: |
|
|
weights_tensor = torch.tensor(class_weights, dtype=torch.float32) |
|
|
|
|
|
logging.info(f"Class weights applied: {class_weights}") |
|
|
return weights_tensor |
|
|
return None |
|
|
|
|
|
|
|
|
def get_model_size(model: nn.Module) -> float: |
|
|
""" |
|
|
Calculate model size in millions of parameters. |
|
|
|
|
|
Args: |
|
|
model: PyTorch model |
|
|
|
|
|
Returns: |
|
|
Number of parameters in millions |
|
|
""" |
|
|
param_size = sum(p.numel() for p in model.parameters()) |
|
|
return param_size / 1e6 |
|
|
|
|
|
|
|
|
def get_trainable_params(model: nn.Module) -> Dict[str, int]: |
|
|
""" |
|
|
Get count of trainable and non-trainable parameters. |
|
|
|
|
|
Args: |
|
|
model: PyTorch model |
|
|
|
|
|
Returns: |
|
|
Dictionary with 'trainable' and 'total' parameter counts |
|
|
""" |
|
|
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
|
|
total = sum(p.numel() for p in model.parameters()) |
|
|
return { |
|
|
'trainable': trainable, |
|
|
'total': total, |
|
|
'non_trainable': total - trainable |
|
|
} |
|
|
|