import pytorch_lightning as pl from transformers import AutoModelForSeq2SeqLM import torch class T5(pl.LightningModule): def __init__(self, lr=5e-5, num_train_epochs=15, warmup_steps=1000): super().__init__() self.model = AutoModelForSeq2SeqLM.from_pretrained("t5-small") self.train_losses=[] self.validation_losses=[] self.train_losses_epoch=[] self.validation_losses_epoch=[] self.save_hyperparameters() def forward(self, input_ids, attention_mask, labels=None): outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels) return outputs def common_step(self, batch, batch_idx): outputs = self(**batch) loss = outputs.loss return loss def training_step(self, batch, batch_idx): loss = self.common_step(batch, batch_idx) # logs metrics for each training_step, # and the average across the epoch self.log("training_loss", loss) self.train_losses.append(loss) return loss def validation_step(self, batch, batch_idx): loss = self.common_step(batch, batch_idx) self.log("validation_loss", loss, on_epoch=True) self.validation_losses.append(loss) return loss return loss def on_train_epoch_end(self): # Calculate average loss for the epoch and append to the list avg_train_loss = sum(self.train_losses)/ len(self.train_losses) self.train_losses_epoch.append(avg_train_loss.item()) # Reset epoch loss accumulator self.train_losses = [] def on_validation_epoch_end(self): # Calculate average loss for the epoch and append to the list avg_val_loss = sum(self.validation_losses) / len(self.validation_losses) self.validation_losses_epoch.append(avg_val_loss.item()) # Reset epoch loss accumulator self.validation_losses = [] # Reset epoch loss accumulator self.test_losses = [] def configure_optimizers(self): # create optimizer optimizer = AdamW(self.model.parameters(), lr=self.hparams.lr) # create learning rate scheduler num_train_optimization_steps = self.hparams.num_train_epochs * len(train_dataloader) lr_scheduler = {'scheduler': get_linear_schedule_with_warmup(optimizer, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=num_train_optimization_steps), 'name': 'learning_rate', 'interval':'step', 'frequency': 1} return {"optimizer": optimizer, "lr_scheduler": lr_scheduler} def generate(self, input_ids, max_new_tokens=30, device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')): input_ids=input_ids.clone().detach().reshape((1,-1)).to(device) return self.model.generate(input_ids) def push_to_hub(self, model_name, organization): # Save the model self.model.push_to_hub(model_name, organization) def from_pretrained(self, model_path): AutoModelForSeq2SeqLM.from_pretrained(model_path) def train_dataloader(self): return train_dataloader def val_dataloader(self): return valid_dataloader def test_dataloader(self): return test_dataloader