Nikhil0987 commited on
Commit
1c7de53
0 Parent(s):
Files changed (1) hide show
  1. datamanager.py +91 -0
datamanager.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import math
4
+ import tarfile
5
+
6
+
7
+ class DataManager:
8
+ def __init__(self, dataset_path):
9
+ self.dataset_path = dataset_path
10
+
11
+ def extract_dataset(self, compressed_dataset_file_name, dataset_directory):
12
+ try:
13
+ # extract files to dataset folder
14
+ tar = tarfile.open(compressed_dataset_file_name, "r:gz")
15
+ tar.extractall(dataset_directory)
16
+ tar.close()
17
+ print("Files extraction was successfull ...")
18
+
19
+ except:
20
+ print("Ecxception raised: No extraction was done ...")
21
+
22
+ def make_folder(self, folder_path):
23
+ try:
24
+ os.mkdir(folder_path)
25
+ print(folder_path, "was created ...")
26
+ except:
27
+ print("Ecxception raised: ", folder_path, "could not be created ...")
28
+
29
+ def move_files(self, src, dst, group):
30
+ for fname in group:
31
+ os.rename(src + '/' + fname, dst + '/' + fname)
32
+
33
+ def get_fnames_from_dict(self, dataset_dict, f_or_m):
34
+ training_data, testing_data = [], []
35
+
36
+ for i in range(1,5):
37
+ length_data = len(dataset_dict[f_or_m +"000" + str(i)])
38
+ length_separator = math.trunc(length_data*2/3)
39
+
40
+ training_data += dataset_dict[f_or_m + "000" + str(i)][:length_separator]
41
+ testing_data += dataset_dict[f_or_m + "000" + str(i)][length_separator:]
42
+
43
+ return training_data, testing_data
44
+
45
+ def manage(self):
46
+
47
+ # read config file and get path to compressed dataset
48
+ compressed_dataset_file_name = self.dataset_path
49
+ dataset_directory = compressed_dataset_file_name.split(".")[0]
50
+
51
+ # create a folder for the data
52
+ try:
53
+ os.mkdir(dataset_directory)
54
+ except:
55
+ pass
56
+
57
+ # extract dataset
58
+ self.extract_dataset(compressed_dataset_file_name, dataset_directory)
59
+
60
+ # select females files and males files
61
+ file_names = [fname for fname in os.listdir(dataset_directory) if ("f0" in fname or "m0" in fname)]
62
+ dataset_dict = {"f0001": [], "f0002": [], "f0003": [], "f0004": [], "f0005": [],
63
+ "m0001": [], "m0002": [], "m0003": [], "m0004": [], "m0005": [], }
64
+
65
+ # fill in dictionary
66
+ for fname in file_names:
67
+ dataset_dict[fname.split('_')[0]].append(fname)
68
+
69
+ # divide and group file names
70
+ training_set, testing_set = {},{}
71
+ training_set["females"], testing_set["females"] = self.get_fnames_from_dict(dataset_dict, "f")
72
+ training_set["males" ], testing_set["males" ] = self.get_fnames_from_dict(dataset_dict, "m")
73
+
74
+ # make training and testing folders
75
+ self.make_folder("TrainingData")
76
+ self.make_folder("TestingData")
77
+ self.make_folder("TrainingData/females")
78
+ self.make_folder("TrainingData/males")
79
+ self.make_folder("TestingData/females")
80
+ self.make_folder("TestingData/males")
81
+
82
+ # move files
83
+ self.move_files(dataset_directory, "TrainingData/females", training_set["females"])
84
+ self.move_files(dataset_directory, "TrainingData/males", training_set["males"])
85
+ self.move_files(dataset_directory, "TestingData/females", testing_set["females"])
86
+ self.move_files(dataset_directory, "TestingData/males", testing_set["males"])
87
+
88
+
89
+ if __name__== "__main__":
90
+ data_manager = DataManager("SLR45.tgz")
91
+ data_manager.manage()