|
from configuration import DatasetName, DatasetType, W300Conf, InputDataSize, LearningConfig, WflwConf |
|
from image_utility import ImageUtility |
|
from sklearn.decomposition import PCA, IncrementalPCA |
|
from sklearn.decomposition import TruncatedSVD |
|
import numpy as np |
|
import pickle |
|
import os |
|
from tqdm import tqdm |
|
from numpy import save, load |
|
import math |
|
from PIL import Image |
|
from numpy import save, load |
|
|
|
|
|
class PCAUtility: |
|
eigenvalues_prefix = "_eigenvalues_" |
|
eigenvectors_prefix = "_eigenvectors_" |
|
meanvector_prefix = "_meanvector_" |
|
|
|
|
|
|
|
def create_pca_from_npy(self, dataset_name, labels_npy_path, pca_percentages): |
|
""" |
|
generate and save eigenvalues, eigenvectors, meanvector |
|
:param labels_npy_path: the path to the normalized labels that are save in npy format. |
|
:param pca_percentages: % of eigenvalues that will be used |
|
:return: generate |
|
""" |
|
path = labels_npy_path |
|
print('PCA calculation started: loading labels') |
|
|
|
lbl_arr = [] |
|
for file in tqdm(os.listdir(path)): |
|
if file.endswith(".npy"): |
|
npy_file = os.path.join(path, file) |
|
lbl_arr.append(load(npy_file)) |
|
|
|
lbl_arr = np.array(lbl_arr) |
|
|
|
reduced_lbl_arr, eigenvalues, eigenvectors = self._func_PCA(lbl_arr, pca_percentages) |
|
mean_lbl_arr = np.mean(lbl_arr, axis=0) |
|
eigenvectors = eigenvectors.T |
|
|
|
save('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages), eigenvalues) |
|
save('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages), eigenvectors) |
|
save('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages), mean_lbl_arr) |
|
|
|
def load_pca_obj(self, dataset_name, pca_percentages): |
|
eigenvalues = np.load('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages)) |
|
eigenvectors = np.load('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages)) |
|
meanvector = np.load('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages)) |
|
return eigenvalues, eigenvectors, meanvector |
|
|
|
def _func_PCA(self, input_data, pca_postfix): |
|
input_data = np.array(input_data) |
|
pca = PCA(n_components=pca_postfix / 100) |
|
|
|
|
|
pca.fit(input_data) |
|
pca_input_data = pca.transform(input_data) |
|
eigenvalues = pca.explained_variance_ |
|
eigenvectors = pca.components_ |
|
return pca_input_data, eigenvalues, eigenvectors |
|
|
|
def __svd_func(self, input_data, pca_postfix): |
|
svd = TruncatedSVD(n_components=50) |
|
svd.fit(input_data) |
|
pca_input_data = svd.transform(input_data) |
|
eigenvalues = svd.explained_variance_ |
|
eigenvectors = svd.components_ |
|
return pca_input_data, eigenvalues, eigenvectors |
|
|
|
|