|
import torch |
|
import torchvision |
|
import torchvision.transforms as transforms |
|
import torch.nn as nn |
|
import torch.optim as optim |
|
|
|
|
|
base_model = torchvision.models.vqa_resnet_finetune(pretrained=True) |
|
|
|
|
|
|
|
|
|
train_dataset = OKVQADataset('train_data.json', transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])) |
|
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True) |
|
|
|
|
|
num_classes = len(train_dataset.classes) |
|
base_model.fc = nn.Linear(base_model.fc.in_features, num_classes) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
base_model = base_model.to(device) |
|
|
|
|
|
criterion = nn.CrossEntropyLoss() |
|
optimizer = optim.Adam(base_model.parameters(), lr=0.001) |
|
|
|
|
|
num_epochs = 10 |
|
for epoch in range(num_epochs): |
|
for inputs, labels in train_loader: |
|
inputs, labels = inputs.to(device), labels.to(device) |
|
|
|
optimizer.zero_grad() |
|
outputs = base_model(inputs) |
|
loss = criterion(outputs, labels) |
|
loss.backward() |
|
optimizer.step() |
|
|
|
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}') |
|
torch.save(base_model.state_dict(), 'git-vqa-finetuned-on-ok-vqa.pth') |
|
|