Dabs commited on
Commit
cb8043e
1 Parent(s): ba20c12

first commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv/
2
+ __pycache__/
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import models
3
+ import torch
4
+ import torchvision.transforms as transforms
5
+ import cv2
6
+ import numpy as np
7
+
8
+
9
+ # initialize the computation device
10
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
+ #intialize the model
12
+ model = models.model(pretrained=False, requires_grad=False).to(device)
13
+ # load the model checkpoint
14
+ checkpoint = torch.load('model.pth')
15
+ # load model weights state_dict
16
+ model.load_state_dict(checkpoint['model_state_dict'])
17
+ model.eval()
18
+
19
+ transform = transforms.Compose([
20
+ transforms.ToPILImage(),
21
+ transforms.ToTensor(),
22
+ ])
23
+
24
+ genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
25
+ 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
26
+ 'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
27
+ 'Sport', 'Thriller', 'War', 'Western']
28
+
29
+
30
+ def segment(image):
31
+ image = np.asarray(image)
32
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
33
+ image = transform(image)
34
+ image = torch.tensor(image, dtype=torch.float32)
35
+ image = image.to(device)
36
+ image = torch.unsqueeze(image, dim=0)
37
+ # get the predictions by passing the image through the model
38
+ outputs = model(image)
39
+ outputs = torch.sigmoid(outputs)
40
+ outputs = outputs.detach().cpu()
41
+
42
+ out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
43
+ return out_dict
44
+
45
+ iface = gr.Interface(fn=segment,
46
+ inputs="image",
47
+ outputs="label",
48
+ title="Poster classification",
49
+ description="classify the genre of your poster by uploading an image",
50
+ examples=[["imagenes/tt0084058.jpg"], ["imagenes/tt0084867.jpg"], ["imagenes/tt0085121.jpg"]]).launch()
imagenes/tt0084058.jpg ADDED
imagenes/tt0084867.jpg ADDED
imagenes/tt0085121.jpg ADDED
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5ec1109b461ee19772daeec5c78c3af9cb28f42854b81338f2ab7ef8d0e52d
3
+ size 94965817
models.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision import models as models
2
+ import torch.nn as nn
3
+ def model(pretrained, requires_grad):
4
+ model = models.resnet50(progress=True, pretrained=pretrained)
5
+ # to freeze the hidden layers
6
+ if requires_grad == False:
7
+ for param in model.parameters():
8
+ param.requires_grad = False
9
+ # to train the hidden layers
10
+ elif requires_grad == True:
11
+ for param in model.parameters():
12
+ param.requires_grad = True
13
+ # make the classification layer learnable
14
+ # we have 25 classes in total
15
+ model.fc = nn.Linear(2048, 25)
16
+ return model
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ pandas
3
+ matplotlib
4
+ tqdm
5
+ opencv-python
6
+ torchvision
7
+ gradio
8
+ jinja2
training/dataset.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import numpy as np
4
+ import torchvision.transforms as transforms
5
+ from torch.utils.data import Dataset
6
+
7
+
8
+ class ImageDataset(Dataset):
9
+ def __init__(self, csv, train, test):
10
+ self.csv = csv
11
+ self.train = train
12
+ self.test = test
13
+ self.all_image_names = self.csv[:]['Id']
14
+ self.all_labels = np.array(self.csv.drop(['Id', 'Genre'], axis=1))
15
+ self.train_ratio = int(0.85 * len(self.csv))
16
+ self.valid_ratio = len(self.csv) - self.train_ratio
17
+ # set the training data images and labels
18
+ if self.train == True:
19
+ print(f"Number of training images: {self.train_ratio}")
20
+ self.image_names = list(self.all_image_names[:self.train_ratio])
21
+ self.labels = list(self.all_labels[:self.train_ratio])
22
+ # define the training transforms
23
+ self.transform = transforms.Compose([
24
+ transforms.ToPILImage(),
25
+ transforms.Resize((400, 400)),
26
+ transforms.RandomHorizontalFlip(p=0.5),
27
+ transforms.RandomRotation(degrees=45),
28
+ transforms.ToTensor(),
29
+ ])
30
+ # set the validation data images and labels
31
+ elif self.train == False and self.test == False:
32
+ print(f"Number of validation images: {self.valid_ratio}")
33
+ self.image_names = list(self.all_image_names[-self.valid_ratio:-10])
34
+ self.labels = list(self.all_labels[-self.valid_ratio:])
35
+ # define the validation transforms
36
+ self.transform = transforms.Compose([
37
+ transforms.ToPILImage(),
38
+ transforms.Resize((400, 400)),
39
+ transforms.ToTensor(),
40
+ ])
41
+ # set the test data images and labels, only last 10 images
42
+ # this, we will use in a separate inference script
43
+ elif self.test == True and self.train == False:
44
+ self.image_names = list(self.all_image_names[-10:])
45
+ self.labels = list(self.all_labels[-10:])
46
+ # define the test transforms
47
+ self.transform = transforms.Compose([
48
+ transforms.ToPILImage(),
49
+ transforms.ToTensor(),
50
+ ])
51
+ def __len__(self):
52
+ return len(self.image_names)
53
+
54
+ def __getitem__(self, index):
55
+ image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/{self.image_names[index]}.jpg")
56
+ # convert the image from BGR to RGB color format
57
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
58
+ # apply image transforms
59
+ image = self.transform(image)
60
+ targets = self.labels[index]
61
+
62
+ return {
63
+ 'image': torch.tensor(image, dtype=torch.float32),
64
+ 'label': torch.tensor(targets, dtype=torch.float32)
65
+ }
training/engine.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from tqdm import tqdm
3
+
4
+ # training function
5
+ def train(model, dataloader, optimizer, criterion, train_data, device):
6
+ print('Training')
7
+ model.train()
8
+ counter = 0
9
+ train_running_loss = 0.0
10
+ for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
11
+ counter += 1
12
+ data, target = data['image'].to(device), data['label'].to(device)
13
+ optimizer.zero_grad()
14
+ outputs = model(data)
15
+ # apply sigmoid activation to get all the outputs between 0 and 1
16
+ outputs = torch.sigmoid(outputs)
17
+ loss = criterion(outputs, target)
18
+ train_running_loss += loss.item()
19
+ # backpropagation
20
+ loss.backward()
21
+ # update optimizer parameters
22
+ optimizer.step()
23
+
24
+ train_loss = train_running_loss / counter
25
+ return train_loss
26
+
27
+ # validation function
28
+ def validate(model, dataloader, criterion, val_data, device):
29
+ print('Validating')
30
+ model.eval()
31
+ counter = 0
32
+ val_running_loss = 0.0
33
+ with torch.no_grad():
34
+ for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
35
+ counter += 1
36
+ data, target = data['image'].to(device), data['label'].to(device)
37
+ outputs = model(data)
38
+ # apply sigmoid activation to get all the outputs between 0 and 1
39
+ outputs = torch.sigmoid(outputs)
40
+ loss = criterion(outputs, target)
41
+ val_running_loss += loss.item()
42
+
43
+ val_loss = val_running_loss / counter
44
+ return val_loss
training/inference.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import models
2
+ import torch
3
+ import numpy as np
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from dataset import ImageDataset
7
+ from torch.utils.data import DataLoader
8
+
9
+
10
+ # initialize the computation device
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ #intialize the model
13
+ model = models.model(pretrained=False, requires_grad=False).to(device)
14
+ # load the model checkpoint
15
+ checkpoint = torch.load('../outputs/model.pth')
16
+ # load model weights state_dict
17
+ model.load_state_dict(checkpoint['model_state_dict'])
18
+ model.eval()
19
+
20
+ train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
21
+ genres = train_csv.columns.values[2:]
22
+ print(genres)
23
+ # prepare the test dataset and dataloader
24
+ test_data = ImageDataset(
25
+ train_csv, train=False, test=True
26
+ )
27
+ test_loader = DataLoader(
28
+ test_data,
29
+ batch_size=1,
30
+ shuffle=False
31
+ )
32
+
33
+ for counter, data in enumerate(test_loader):
34
+ image, target = data['image'].to(device), data['label']
35
+ # get all the index positions where value == 1
36
+ target_indices = [i for i in range(len(target[0])) if target[0][i] == 1]
37
+ # get the predictions by passing the image through the model
38
+ print(image.shape)
39
+ outputs = model(image)
40
+ outputs = torch.sigmoid(outputs)
41
+ outputs = outputs.detach().cpu()
42
+ sorted_indices = np.argsort(outputs[0])
43
+ best = sorted_indices[-3:]
44
+ string_predicted = ''
45
+ string_actual = ''
46
+ for i in range(len(best)):
47
+ string_predicted += f"{genres[best[i]]} "
48
+ for i in range(len(target_indices)):
49
+ string_actual += f"{genres[target_indices[i]]} "
50
+ image = image.squeeze(0)
51
+ image = image.detach().cpu().numpy()
52
+ image = np.transpose(image, (1, 2, 0))
53
+ plt.imshow(image)
54
+ plt.axis('off')
55
+ plt.title(f"PREDICTED: {string_predicted}\nACTUAL: {string_actual}")
56
+ plt.savefig(f"../outputs/inference_{counter}.jpg")
57
+ plt.show()
training/models.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision import models as models
2
+ import torch.nn as nn
3
+ def model(pretrained, requires_grad):
4
+ model = models.resnet50(progress=True, pretrained=pretrained)
5
+ # to freeze the hidden layers
6
+ if requires_grad == False:
7
+ for param in model.parameters():
8
+ param.requires_grad = False
9
+ # to train the hidden layers
10
+ elif requires_grad == True:
11
+ for param in model.parameters():
12
+ param.requires_grad = True
13
+ # make the classification layer learnable
14
+ # we have 25 classes in total
15
+ model.fc = nn.Linear(2048, 25)
16
+ return model
training/predict_single.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import models
2
+ import torch
3
+ import torchvision.transforms as transforms
4
+ import cv2
5
+
6
+
7
+ # initialize the computation device
8
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
9
+ #intialize the model
10
+ model = models.model(pretrained=False, requires_grad=False).to(device)
11
+ # load the model checkpoint
12
+ checkpoint = torch.load('../outputs/model.pth')
13
+ # load model weights state_dict
14
+ model.load_state_dict(checkpoint['model_state_dict'])
15
+ model.eval()
16
+
17
+ transform = transforms.Compose([
18
+ transforms.ToPILImage(),
19
+ transforms.ToTensor(),
20
+ ])
21
+
22
+ genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
23
+ 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
24
+ 'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
25
+ 'Sport', 'Thriller', 'War', 'Western']
26
+
27
+
28
+ image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/tt0084058.jpg")
29
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
30
+ image = transform(image)
31
+ image = torch.tensor(image, dtype=torch.float32)
32
+ image = image.to(device)
33
+ image = torch.unsqueeze(image, dim=0)
34
+ # get the predictions by passing the image through the model
35
+ outputs = model(image)
36
+ outputs = torch.sigmoid(outputs)
37
+ outputs = outputs.detach().cpu()
38
+
39
+
40
+ out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
41
+ print(out_dict)
42
+
training/train.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import models
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import matplotlib
8
+ from engine import train, validate
9
+ from dataset import ImageDataset
10
+ from torch.utils.data import DataLoader
11
+ matplotlib.style.use('ggplot')
12
+ # initialize the computation device
13
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
14
+ print(device)
15
+
16
+ #intialize the model
17
+ model = models.model(pretrained=True, requires_grad=False).to(device)
18
+ # learning parameters
19
+ lr = 0.0001
20
+ epochs = 10
21
+ batch_size = 32
22
+ optimizer = optim.Adam(model.parameters(), lr=lr)
23
+ criterion = nn.BCELoss()
24
+
25
+ # read the training csv file
26
+ train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
27
+ # train dataset
28
+ train_data = ImageDataset(
29
+ train_csv, train=True, test=False
30
+ )
31
+ # validation dataset
32
+ valid_data = ImageDataset(
33
+ train_csv, train=False, test=False
34
+ )
35
+ # train data loader
36
+ train_loader = DataLoader(
37
+ train_data,
38
+ batch_size=batch_size,
39
+ shuffle=True
40
+ )
41
+ # validation data loader
42
+ valid_loader = DataLoader(
43
+ valid_data,
44
+ batch_size=batch_size,
45
+ shuffle=False
46
+ )
47
+
48
+ # start the training and validation
49
+ train_loss = []
50
+ valid_loss = []
51
+ for epoch in range(epochs):
52
+ print(f"Epoch {epoch+1} of {epochs}")
53
+ train_epoch_loss = train(
54
+ model, train_loader, optimizer, criterion, train_data, device
55
+ )
56
+ valid_epoch_loss = validate(
57
+ model, valid_loader, criterion, valid_data, device
58
+ )
59
+ train_loss.append(train_epoch_loss)
60
+ valid_loss.append(valid_epoch_loss)
61
+ print(f"Train Loss: {train_epoch_loss:.4f}")
62
+ print(f'Val Loss: {valid_epoch_loss:.4f}')
63
+
64
+
65
+
66
+ # save the trained model to disk
67
+ torch.save({
68
+ 'epoch': epochs,
69
+ 'model_state_dict': model.state_dict(),
70
+ 'optimizer_state_dict': optimizer.state_dict(),
71
+ 'loss': criterion,
72
+ }, '../outputs/model.pth')
73
+ # plot and save the train and validation line graphs
74
+ plt.figure(figsize=(10, 7))
75
+ plt.plot(train_loss, color='orange', label='train loss')
76
+ plt.plot(valid_loss, color='red', label='validataion loss')
77
+ plt.xlabel('Epochs')
78
+ plt.ylabel('Loss')
79
+ plt.legend()
80
+ plt.savefig('../outputs/loss.png')
81
+ plt.show()