import torch import torch.nn as nn from torch.utils.data import DataLoader, random_split from torchvision import transforms from tqdm import tqdm from datasets import load_dataset from utils.preprocessing import get_transforms from src.dataset import HumanActionDataset from models.resnet_model import ResNet18 import os def train_model(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Load dataset from Hugging Face ds = load_dataset("Bingsu/Human_Action_Recognition") # Get train dataset and apply transforms transform = get_transforms() full_dataset = HumanActionDataset(ds["train"], transform=transform) # Split train into train/val (e.g., 90% train, 10% val) train_size = int(0.9 * len(full_dataset)) val_size = len(full_dataset) - train_size train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size]) # Use batch size 32 (good balance between speed and generalization on CPU) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4) val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4) # Initialize model model = ResNet18(num_classes=15).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Scheduler without verbose (fix for your PyTorch version) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.5, patience=2 ) best_val_acc = 0.0 epochs = 10 for epoch in range(epochs): model.train() train_loss = 0.0 correct = 0 total = 0 loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} Training") for images, labels in loop: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_loss += loss.item() * images.size(0) _, predicted = torch.max(outputs, 1) total += labels.size(0) correct += (predicted == labels).sum().item() loop.set_postfix(loss=loss.item(), acc=correct/total) train_loss /= total train_acc = correct / total # Validation model.eval() val_loss = 0.0 val_correct = 0 val_total = 0 with torch.no_grad(): for images, labels in val_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) loss = criterion(outputs, labels) val_loss += loss.item() * images.size(0) _, predicted = torch.max(outputs, 1) val_total += labels.size(0) val_correct += (predicted == labels).sum().item() val_loss /= val_total val_acc = val_correct / val_total print(f"Epoch {epoch+1}/{epochs} | " f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | " f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}") # Step the scheduler with validation loss scheduler.step(val_loss) # Save best model if val_acc > best_val_acc: best_val_acc = val_acc os.makedirs("models", exist_ok=True) torch.save(model.state_dict(), "models/best_model.pth") print("Saved best model.") if __name__ == "__main__": train_model()