AdamW(model.parameters(), lr=5e-5)

class BERTTextToRating(PreTrainedModel):
    config_class = BERTTextToRatingConfig

    def __init__(self, config):
        super(BERTTextToRating, self).__init__(config)
        model_checkpoint = "medhabi/distilbert-base-uncased-mlm-ta-local"
        model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
        self.bert_model = model.distilbert
        self.dropout = torch.nn.Dropout(0.3)
        self.linear = torch.nn.Linear(768, 5)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        output = self.bert_model(
            input_ids, 
            attention_mask=attention_mask,
        )
        output_dropout = self.dropout(output.last_hidden_state[0][0].reshape(1,-1))
        output = self.linear(output_dropout)
        return output