biplab2008 commited on
Commit
dee4edb
1 Parent(s): dc7407d

single folder data fetchin

Browse files
Honey_Regression_Inference.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
loaders_viscosity.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from PIL import Image
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from torch.utils.data.sampler import SubsetRandomSampler
6
+ import torch
7
+ import torchvision.transforms as transforms
8
+ from tqdm import tqdm
9
+ from sklearn.model_selection import train_test_split
10
+ from typing import List, Optional
11
+ from glob import glob
12
+
13
+
14
+ # dataset class for 3DCNN
15
+ class Dataset_3DCNN(Dataset):
16
+ "Characterizes a dataset for PyTorch"
17
+ def __init__(self,
18
+ path : str,
19
+ folders : List[str],
20
+ labels : List[float],
21
+ frames : List[int],
22
+ transform : Optional[transforms.Compose] = None):
23
+ "Initialization"
24
+ self.path = path
25
+ self.labels = labels
26
+ self.folders = folders
27
+ self.transform = transform
28
+ self.frames = frames
29
+
30
+ def __len__(self):
31
+ "Denotes the total number of samples"
32
+ return len(self.folders)
33
+
34
+ def read_images(self, path, selected_folder, use_transform):
35
+ X = []
36
+ for i in self.frames:
37
+ image = Image.open(os.path.join(path, selected_folder, 'frame_{:01d}.jpg'.format(i))).convert('L')
38
+
39
+ if use_transform is not None:
40
+ image = use_transform(image)
41
+ else:
42
+ image = transforms.ToTensor()(image)
43
+
44
+ X.append(image.squeeze_(0))
45
+ X = torch.stack(X, dim=0)
46
+
47
+ return X
48
+
49
+ def __getitem__(self, index):
50
+ "Generates one sample of data"
51
+ # Select sample
52
+ folder = self.folders[index]
53
+
54
+ # Load data
55
+ X = self.read_images(self.path, folder, self.transform).unsqueeze_(0) # (input) spatial images
56
+ y = torch.LongTensor([self.labels[index]]) # (labels) LongTensor are for int64 instead of FloatTensor
57
+
58
+ # print(X.shape)
59
+ return X, y
60
+
61
+
62
+
63
+
64
+ # train test splitting
65
+ def create_datasets(path : str = r'D:\All_files\pys\AI_algos\Mikes_Work\viscosity-video-classification\code_digdiscovery\new_honey_164', # absolute path
66
+ validation_split : float = 0.2,
67
+ test_split : float = 0.2,
68
+ batch_size : int = 32,
69
+ transform : transforms.Compose = transforms.Compose([transforms.Resize([256, 342]),
70
+ transforms.ToTensor(),
71
+ transforms.Normalize(mean=[0.5], std=[0.5])]),
72
+ random_seed : int = 112,
73
+ shuffle : bool = True,
74
+ selected_frames : List[int] = [0,10,20]):
75
+
76
+
77
+ all_X_list = [filename for filename in os.listdir(path)]
78
+ all_y_list = [int(filename) for filename in os.listdir(path)]
79
+
80
+ # train, test split
81
+ train_list, test_list, train_label, test_label = train_test_split(all_X_list, all_y_list, test_size=test_split, random_state=random_seed)
82
+
83
+
84
+
85
+ train_set, test_set = Dataset_3DCNN(path, train_list, train_label, selected_frames, transform=transform), \
86
+ Dataset_3DCNN(path, test_list, test_label, selected_frames, transform=transform)
87
+ print('length test set ', len(test_set))
88
+
89
+ # split into training and validation batches
90
+ num_train = len(train_list)
91
+ indices = list(range(num_train))
92
+
93
+ if shuffle :
94
+ np.random.seed(random_seed)
95
+ np.random.shuffle(indices)
96
+
97
+ split = int(np.floor(validation_split * num_train))
98
+ train_idx, valid_idx = indices[split:], indices[:split]
99
+
100
+ train_sampler = SubsetRandomSampler(train_idx)
101
+
102
+ valid_sampler = SubsetRandomSampler(valid_idx)
103
+
104
+ # loading train, validation and test data
105
+ train_loader = DataLoader(train_set,
106
+ batch_size=batch_size,
107
+ sampler=train_sampler,
108
+ num_workers=0)
109
+ valid_loader = DataLoader(train_set,
110
+ batch_size=batch_size,
111
+ sampler=valid_sampler,
112
+ num_workers=0)
113
+
114
+ test_loader = DataLoader(test_set,
115
+ batch_size=batch_size,
116
+ num_workers=0)
117
+
118
+
119
+
120
+ return train_loader, test_loader, valid_loader
121
+
122
+
123
+
124
+
125
+ def fetch_data_single_folder(path : str = r'C:\Users\bdutta\work\pys\AI_algos\viscosity\new_honey_164\2350',
126
+ frames : np.array = np.arange(2,62,2),
127
+ use_transform : transforms.Compose =transforms.Compose([transforms.Resize([256, 342]),
128
+ transforms.ToTensor(),
129
+ transforms.Normalize(mean=[0.5], std=[0.5])])
130
+ ):
131
+ X = []
132
+
133
+ for i in frames:
134
+ image = Image.open(os.path.join(path, 'frame_{:01d}.jpg'.format(i))).convert('L')
135
+
136
+ if use_transform is not None:
137
+ image = use_transform(image)
138
+ else:
139
+ image = transforms.ToTensor()(image)
140
+ X.append(image)
141
+ X = torch.stack(X, dim=1).unsqueeze(0)
142
+ return X
utils/loaders_viscosity.py CHANGED
@@ -117,4 +117,26 @@ def create_datasets(path : str = r'D:\All_files\pys\AI_algos\Mikes_Work\viscosit
117
 
118
 
119
 
120
- return train_loader, test_loader, valid_loader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
 
119
 
120
+ return train_loader, test_loader, valid_loader
121
+
122
+
123
+
124
+
125
+ def fetch_data_single_folder(path : str = r'C:\Users\bdutta\work\pys\AI_algos\viscosity\new_honey_164\2350',
126
+ frames : np.array = np.arange(2,62,2),
127
+ use_transform : transforms.Compose =transforms.Compose([transforms.Resize([256, 342]),
128
+ transforms.ToTensor(),
129
+ transforms.Normalize(mean=[0.5], std=[0.5])])
130
+ ):
131
+ X = []
132
+
133
+ for i in frames:
134
+ image = Image.open(os.path.join(path, 'frame_{:01d}.jpg'.format(i))).convert('L')
135
+
136
+ if use_transform is not None:
137
+ image = use_transform(image)
138
+ else:
139
+ image = transforms.ToTensor()(image)
140
+ X.append(image)
141
+ X = torch.stack(X, dim=1).unsqueeze(0)
142
+ return X