somethingbyai commited on
Commit
5801159
1 Parent(s): b5d950b

Create MyDataSet.py

Browse files
Files changed (1) hide show
  1. MyDataSet.py +409 -0
MyDataSet.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas
2
+ from torchvision.transforms import transforms
3
+ from torch.utils.data import Dataset
4
+ from torchvision import datasets
5
+ import torch
6
+ import numpy as np
7
+
8
+
9
+ class MyDataSets:
10
+ def __init__(self, tuble=(4, 9), batch_size_train=16, batch_size_test=10000):
11
+ print('MyDataSets.MyDataSets.__init__')
12
+ self.batch_size_train = batch_size_train
13
+ self.batch_size_test = batch_size_test
14
+ self.indices_batch_size_test_all = np.array([x for x in range(batch_size_test)])
15
+ print(f'{self.indices_batch_size_test_all}')
16
+ self.dataset_train_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
17
+ download=True)
18
+ self.dataset_test_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
19
+ download=True)
20
+ self.dataloader_train_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
21
+ batch_size=self.batch_size_train)
22
+ self.dataloader_test_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
23
+ batch_size=self.batch_size_test)
24
+ self.test_subset_full = torch.utils.data.Subset(self.dataset_test_full, self.indices_batch_size_test_all)
25
+ _trainset_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(), download=True)
26
+ _testset_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(), download=True)
27
+
28
+ _train_idx_4 = np.asarray(_trainset_full.targets == 4).nonzero()
29
+ _train_idx_9 = np.asarray(_trainset_full.targets == 9).nonzero()
30
+
31
+ self.train_loader_subset_size = _train_idx = np.hstack(_train_idx_4 + _train_idx_9)
32
+ _size_train = len(_train_idx)
33
+ # print(f'{_train_idx = }')
34
+ # print(f'{_size_train = }')
35
+ _train_subset = torch.utils.data.Subset(_trainset_full, _train_idx)
36
+ self.train_loader_subset = torch.utils.data.DataLoader(_train_subset, shuffle=True, batch_size=_size_train)
37
+
38
+ # _test_idx = np.where(_testset_full.targets == (4 | 9))[0]
39
+
40
+ _test_idx_4 = np.asarray(_testset_full.targets == 4).nonzero()
41
+ _test_idx_9 = np.asarray(_testset_full.targets == 9).nonzero()
42
+
43
+ _test_idx = np.hstack(_test_idx_4 + _test_idx_9)
44
+
45
+ # train_idx = np.where(testset.targets == tuble)[0]
46
+ self.test_loader_subset_size = _size_test = len(_test_idx)
47
+ _test_subset = torch.utils.data.Subset(_testset_full, _test_idx)
48
+ self.test_loader_subset = torch.utils.data.DataLoader(_test_subset, shuffle=True, batch_size=_size_test)
49
+ # self.test_loader_subset_size = (self.test_loader_subset).l
50
+
51
+ print(f'{self.train_loader_subset_size = }')
52
+ print(f'{self.train_loader_subset = }')
53
+ print(f'{self.test_loader_subset_size = }')
54
+
55
+ def for_plotting_dataloader_test_full(self):
56
+ return next(iter(self.dataloader_test_full))
57
+
58
+
59
+ class MyDataSets_Subset:
60
+ def __init__(self, batch_size_train=32, batch_size_test=10000):
61
+ print('MyDataSets.MyDataSets_Subset.__init__')
62
+ self.batch_size_train = batch_size_train
63
+ self.batch_size_test = batch_size_test
64
+ self.indices_batch_size_test_all = np.array([x for x in range(batch_size_test)])
65
+ print(f'{self.indices_batch_size_test_all}')
66
+ self.dataset_train_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
67
+ download=True)
68
+ self.dataset_test_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
69
+ download=True)
70
+ self.dataloader_train_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
71
+ batch_size=self.batch_size_train)
72
+ self.dataloader_test_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
73
+ batch_size=self.batch_size_test)
74
+ self.test_subset_full = torch.utils.data.Subset(self.dataset_test_full, self.indices_batch_size_test_all)
75
+ _trainset_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(), download=True)
76
+ _testset_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(), download=True)
77
+
78
+ _train_idx_4 = np.asarray(_trainset_full.targets == 4).nonzero()
79
+ _train_idx_9 = np.asarray(_trainset_full.targets == 9).nonzero()
80
+ # _train_idx_0 = np.asarray(_trainset_full.targets == 0).nonzero()
81
+
82
+ _train_idx = np.hstack(_train_idx_4 + _train_idx_9)
83
+ # _train_idx = np.hstack(_train_idx_4 + _train_idx_9 + _train_idx_0)
84
+ self.train_loader_subset_size = _size_train = len(_train_idx)
85
+ print(f'{self.train_loader_subset_size = }')
86
+ # print(f'{_train_idx = }')
87
+ # print(f'{_size_train = }')
88
+ _train_subset = torch.utils.data.Subset(_trainset_full, _train_idx)
89
+ self.train_loader_subset = torch.utils.data.DataLoader(_train_subset, shuffle=True, batch_size=batch_size_train)
90
+
91
+ # _test_idx = np.where(_testset_full.targets == (4 | 9))[0]
92
+
93
+ _test_idx_4 = np.asarray(_testset_full.targets == 4).nonzero()
94
+ _test_idx_9 = np.asarray(_testset_full.targets == 9).nonzero()
95
+ # _test_idx_0 = np.asarray(_testset_full.targets == 0).nonzero()
96
+
97
+ # _test_idx = np.hstack(_test_idx_4 + _test_idx_9 + _test_idx_0)
98
+ _test_idx = np.hstack(_test_idx_4 + _test_idx_9)
99
+
100
+ # train_idx = np.where(testset.targets == tuble)[0]
101
+ self.test_loader_subset_size = _size_test = len(_test_idx)
102
+ _test_subset = torch.utils.data.Subset(_testset_full, _test_idx)
103
+ self.test_loader_subset = torch.utils.data.DataLoader(_test_subset, shuffle=True, batch_size=_size_test)
104
+ # self.test_loader_subset_size = (self.test_loader_subset).l
105
+
106
+ print(f'{self.train_loader_subset_size = }')
107
+ print(f'{self.train_loader_subset = }')
108
+ print(f'{self.test_loader_subset_size = }')
109
+
110
+ def for_plotting_dataloader_test_full(self):
111
+ return next(iter(self.dataloader_test_full))
112
+
113
+ # def for_plotting_dataloader_test_subset(self):
114
+ # return next(iter(self.test_loader_subset))
115
+ def dataloader_train_subset(self):
116
+ return self.train_loader_subset
117
+
118
+ def dataloader_train_subset_one_batch(self):
119
+ return next(iter(self.train_loader_subset))
120
+
121
+ def dataloader_test_subset(self):
122
+ return self.test_loader_subset
123
+
124
+ def dataloader_test_subset_one_batch(self):
125
+ return next(iter(self.test_loader_subset))
126
+
127
+
128
+ class MyDataSets_Subset_4_9:
129
+ def __init__(self, batch_size_train=32, batch_size_test=10000):
130
+ print('MyDataSets.MyDataSets_Subset_4_9.__init__')
131
+ self.batch_size_train = batch_size_train
132
+ self.batch_size_test = batch_size_test
133
+ # self.indices_batch_size_test_all = np.array([x for x in range(batch_size_test)])
134
+ # print(f'{self.indices_batch_size_test_all}')
135
+
136
+ # _dataset_train_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
137
+ # download=True)
138
+ # _dataset_test_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
139
+ # download=True)
140
+ # self.dataloader_train_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
141
+ # batch_size=self.batch_size_train)
142
+ # self.dataloader_test_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
143
+ # batch_size=self.batch_size_test)
144
+ # self.test_subset_full = torch.utils.data.Subset(self.dataset_test_full, self.indices_batch_size_test_all)
145
+ _trainset_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
146
+ download=True)
147
+ _testset_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
148
+ download=True)
149
+
150
+ _train_idx_4 = np.asarray(_trainset_full.targets == 4).nonzero()
151
+ _train_idx_9 = np.asarray(_trainset_full.targets == 9).nonzero()
152
+ # Change class_nr: 4=>0, 9=>1
153
+ _trainset_full.targets[_train_idx_4] = 0
154
+ _trainset_full.targets[_train_idx_9] = 1
155
+
156
+ _train_idx = np.hstack(_train_idx_4 + _train_idx_9)
157
+ self.train_loader_subset_changed_labels_size = _size_train = len(_train_idx)
158
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
159
+ if batch_size_train == -1: batch_size_train = self.train_loader_subset_changed_labels_size
160
+ _train_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_trainset_full, _train_idx)
161
+ self.train_loader_subset_changed_labels = torch.utils.data.DataLoader(_train_subset_changed_labels_to_0_1,
162
+ shuffle=True,
163
+ batch_size=batch_size_train)
164
+
165
+ # TEST
166
+ _test_idx_4 = np.asarray(_testset_full.targets == 4).nonzero()
167
+ _test_idx_9 = np.asarray(_testset_full.targets == 9).nonzero()
168
+
169
+ # Change class_nr: 4=>0, 9=>1
170
+ _testset_full.targets[_test_idx_4] = 0
171
+ _testset_full.targets[_test_idx_9] = 1
172
+
173
+ _test_idx = np.hstack(_test_idx_4 + _test_idx_9)
174
+
175
+ self.test_loader_subset_changed_labels_size = len(_test_idx)
176
+ _test_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_testset_full, _test_idx)
177
+ self.test_loader_subset_changed_labels = torch.utils.data.DataLoader(_test_subset_changed_labels_to_0_1,
178
+ shuffle=False,
179
+ batch_size=self.test_loader_subset_changed_labels_size)
180
+
181
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
182
+ # print(f'{self.train_loader_subset_changed_labels = }')
183
+ #
184
+ # print(f'{self.test_loader_subset_changed_labels_size = }')
185
+ # print(f'{self.test_loader_subset_changed_labels = }')
186
+
187
+ def for_plotting_dataloader_test_full(self):
188
+ return next(iter(self.test_loader_subset_changed_labels))
189
+
190
+ def dataloader_train_subset(self):
191
+ return self.train_loader_subset_changed_labels
192
+
193
+ def dataloader_train_subset_one_batch(self):
194
+ return next(iter(self.dataloader_train_subset()))
195
+
196
+ def dataloader_test_subset(self):
197
+ return self.test_loader_subset_changed_labels
198
+
199
+ def dataloader_test_subset_one_batch(self):
200
+ return next(iter(self.dataloader_test_subset()))
201
+
202
+
203
+ class MyDataSets_Subset_4:
204
+ def __init__(self, batch_size_train=32, batch_size_test=10000):
205
+ print('MyDataSets.MyDataSets_Subset_4.__init__')
206
+ self.batch_size_train = batch_size_train
207
+ self.batch_size_test = batch_size_test
208
+ # self.indices_batch_size_test_all = np.array([x for x in range(batch_size_test)])
209
+ # print(f'{self.indices_batch_size_test_all}')
210
+
211
+ # _dataset_train_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
212
+ # download=True)
213
+ # _dataset_test_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
214
+ # download=True)
215
+ # self.dataloader_train_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
216
+ # batch_size=self.batch_size_train)
217
+ # self.dataloader_test_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
218
+ # batch_size=self.batch_size_test)
219
+ # self.test_subset_full = torch.utils.data.Subset(self.dataset_test_full, self.indices_batch_size_test_all)
220
+ _trainset_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
221
+ download=True)
222
+ _testset_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
223
+ download=True)
224
+
225
+ _train_idx_4 = np.asarray(_trainset_full.targets == 4).nonzero()
226
+ # Change class_nr: 4=>0, 9=>1
227
+ _trainset_full.targets[_train_idx_4] = 0
228
+
229
+ _train_idx = np.hstack(_train_idx_4)
230
+ self.train_loader_subset_changed_labels_size = _size_train = len(_train_idx)
231
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
232
+ _train_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_trainset_full, _train_idx)
233
+ self.train_loader_subset_changed_labels = torch.utils.data.DataLoader(_train_subset_changed_labels_to_0_1,
234
+ shuffle=True,
235
+ batch_size=batch_size_train)
236
+
237
+ # TEST
238
+ _test_idx_4 = np.asarray(_testset_full.targets == 4).nonzero()
239
+
240
+ # Change class_nr: 4=>0, 9=>1
241
+ _testset_full.targets[_test_idx_4] = 0
242
+
243
+ _test_idx = np.hstack(_test_idx_4)
244
+
245
+ self.test_loader_subset_changed_labels_size = len(_test_idx)
246
+ _test_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_testset_full, _test_idx)
247
+ self.test_loader_subset_changed_labels = torch.utils.data.DataLoader(_test_subset_changed_labels_to_0_1,
248
+ shuffle=True,
249
+ batch_size=self.test_loader_subset_changed_labels_size)
250
+
251
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
252
+ # print(f'{self.train_loader_subset_changed_labels = }')
253
+ #
254
+ # print(f'{self.test_loader_subset_changed_labels_size = }')
255
+ # print(f'{self.test_loader_subset_changed_labels = }')
256
+
257
+ def for_plotting_dataloader_test_full(self):
258
+ return next(iter(self.test_loader_subset_changed_labels))
259
+
260
+ def dataloader_train_subset(self):
261
+ return self.train_loader_subset_changed_labels
262
+
263
+ def dataloader_train_subset_one_batch(self):
264
+ return next(iter(self.dataloader_train_subset()))
265
+
266
+ def dataloader_test_subset(self):
267
+ return self.test_loader_subset_changed_labels
268
+
269
+ def dataloader_test_subset_one_batch(self):
270
+ return next(iter(self.dataloader_test_subset()))
271
+
272
+
273
+ class MyDataSets_Subset_9:
274
+ def __init__(self, batch_size_train=32, batch_size_test=10000):
275
+ print('MyDataSets.MyDataSets_Subset_9.__init__')
276
+ self.batch_size_train = batch_size_train
277
+ self.batch_size_test = batch_size_test
278
+ # self.indices_batch_size_test_all = np.array([x for x in range(batch_size_test)])
279
+ # print(f'{self.indices_batch_size_test_all}')
280
+
281
+ # _dataset_train_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
282
+ # download=True)
283
+ # _dataset_test_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
284
+ # download=True)
285
+ # self.dataloader_train_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
286
+ # batch_size=self.batch_size_train)
287
+ # self.dataloader_test_full = torch.utils.data.DataLoader(self.dataset_train_full, shuffle=True,
288
+ # batch_size=self.batch_size_test)
289
+ # self.test_subset_full = torch.utils.data.Subset(self.dataset_test_full, self.indices_batch_size_test_all)
290
+ _trainset_full = datasets.MNIST(root='data/dataset', train=True, transform=transforms.ToTensor(),
291
+ download=True)
292
+ _testset_full = datasets.MNIST(root='data/testset', train=False, transform=transforms.ToTensor(),
293
+ download=True)
294
+
295
+ _train_idx_9 = np.asarray(_trainset_full.targets == 9).nonzero()
296
+ # Change class_nr: 4=>0, 9=>1
297
+ _trainset_full.targets[_train_idx_9] = 0
298
+
299
+ _train_idx = np.hstack(_train_idx_9)
300
+ self.train_loader_subset_changed_labels_size = _size_train = len(_train_idx)
301
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
302
+ _train_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_trainset_full, _train_idx)
303
+ self.train_loader_subset_changed_labels = torch.utils.data.DataLoader(_train_subset_changed_labels_to_0_1,
304
+ shuffle=True,
305
+ batch_size=batch_size_train)
306
+
307
+ # TEST
308
+ _test_idx_9 = np.asarray(_testset_full.targets == 9).nonzero()
309
+
310
+ # Change class_nr: 4=>0, 9=>1
311
+ _testset_full.targets[_test_idx_9] = 0
312
+
313
+ _test_idx = np.hstack(_test_idx_9)
314
+
315
+ self.test_loader_subset_changed_labels_size = len(_test_idx)
316
+ _test_subset_changed_labels_to_0_1 = torch.utils.data.Subset(_testset_full, _test_idx)
317
+ self.test_loader_subset_changed_labels = torch.utils.data.DataLoader(_test_subset_changed_labels_to_0_1,
318
+ shuffle=True,
319
+ batch_size=self.test_loader_subset_changed_labels_size)
320
+
321
+ # print(f'{self.train_loader_subset_changed_labels_size = }')
322
+ # print(f'{self.train_loader_subset_changed_labels = }')
323
+ #
324
+ # print(f'{self.test_loader_subset_changed_labels_size = }')
325
+ # print(f'{self.test_loader_subset_changed_labels = }')
326
+
327
+ def for_plotting_dataloader_test_full(self):
328
+ return next(iter(self.test_loader_subset_changed_labels))
329
+
330
+ def dataloader_train_subset(self):
331
+ return self.train_loader_subset_changed_labels
332
+
333
+ def dataloader_train_subset_one_batch(self):
334
+ return next(iter(self.dataloader_train_subset()))
335
+
336
+ def dataloader_test_subset(self):
337
+ return self.test_loader_subset_changed_labels
338
+
339
+ def dataloader_test_subset_one_batch(self):
340
+ return next(iter(self.dataloader_test_subset()))
341
+
342
+
343
+ import os
344
+ import pandas as pd
345
+ from torchvision.io import read_image
346
+
347
+
348
+ class CustomDatasetCSV(Dataset):
349
+ def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
350
+ self.img_labels = pd.read_csv(annotations_file)
351
+ self.img_dir = img_dir
352
+ self.transform = transform
353
+ self.target_transform = target_transform
354
+
355
+ def __len__(self):
356
+ return len(self.img_labels)
357
+
358
+ def __getitem__(self, idx):
359
+ img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
360
+ image = read_image(img_path)
361
+ label = self.img_labels.iloc[idx, 1]
362
+ if self.transform:
363
+ image = self.transform(image)
364
+ if self.target_transform:
365
+ label = self.target_transform(label)
366
+ return image, label
367
+
368
+
369
+ # class MyDataSet(Dataset)
370
+ # class CustomDataset(Dataset):
371
+ class MyCustomDataset(Dataset):
372
+ def __init__(self, df: pandas.DataFrame, transform=None, target_transform=None):
373
+ # self.img_labels = pd.read_csv(annotations_file)
374
+ # self.labels = df.
375
+ self.df = df
376
+ self.transform = transform
377
+ self.target_transform = target_transform
378
+
379
+ def __len__(self):
380
+ return len(self.df)
381
+
382
+ def __getitem__(self, idx):
383
+ z = torch.tensor((float(self.df['z0'].iloc[idx]), float(self.df['z1'].iloc[idx])))
384
+ label = torch.tensor(self.df['labels'].iloc[idx])
385
+ return z, label
386
+
387
+
388
+ # class MyDataSet(Dataset)
389
+
390
+ class CustomDatasetOld(Dataset):
391
+ def __init__(self, labels, z01: pandas.DataFrame, transform=None, target_transform=None):
392
+ # self.img_labels = pd.read_csv(annotations_file)
393
+ self.labels = labels
394
+ self.z01 = z01
395
+ self.transform = transform
396
+ self.target_transform = target_transform
397
+
398
+ def __len__(self):
399
+ return len(self.labels)
400
+
401
+ def __getitem__(self, idx):
402
+ z = self.z01['z0'].iloc[idx] + self.z01['z1']
403
+ label = self.labels.iloc[idx, 1]
404
+ # if self.transform:
405
+ # image = self.transform(image)
406
+ # if self.target_transform:
407
+ # label = self.target_transform(label)
408
+ return z, label
409
+ # class MyDataSet(Dataset)