import torch from torch.utils.data import DataLoader from model_scratch import MathTransformer from dataset_utils_scratch import MathDataset, CharTokenizer import torch.nn as nn import torch.optim as optim # Load dataset with open("dataset.json","r") as f: import json data = json.load(f) all_texts = [item['input'] + item['output'] for item in data] tokenizer = CharTokenizer(all_texts) dataset = MathDataset("dataset.json", tokenizer) loader = DataLoader(dataset, batch_size=2, shuffle=True) # Initialize model model = MathTransformer(vocab_size=tokenizer.vocab_size).to("cpu") optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss() # Training loop for epoch in range(10): for batch in loader: x = batch['input'] y = batch['output'] optimizer.zero_grad() outputs = model(x) loss = criterion(outputs.view(-1, tokenizer.vocab_size), y.view(-1)) loss.backward() optimizer.step() print(f"Epoch {epoch+1}, Loss: {loss.item()}")