Ad-Corre / train.py

daliprf

init

1eced3c almost 3 years ago

18 kB


	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt
	import math
	from datetime import datetime
	from sklearn.utils import shuffle
	from sklearn.model_selection import train_test_split
	from numpy import save, load, asarray
	import csv
	from skimage.io import imread
	import pickle
	from sklearn.metrics import accuracy_score
	import os
	import time

	from AffectNetClass import AffectNet
	from RafdbClass import RafDB
	from FerPlusClass import FerPlus

	from config import DatasetName, AffectnetConf, InputDataSize, LearningConfig, DatasetType, RafDBConf, FerPlusConf
	from cnn_model import CNNModel
	from custom_loss import CustomLosses
	from data_helper import DataHelper
	from dataset_class import CustomDataset


	class TrainModel:
	def __init__(self, dataset_name, ds_type, weights='imagenet', lr=1e-3, aug=True):
	self.dataset_name = dataset_name
	self.ds_type = ds_type
	self.weights = weights
	self.lr = lr

	self.base_lr = 1e-5
	self.max_lr = 5e-4
	if dataset_name == DatasetName.fer2013:
	self.drop = 0.1
	self.epochs_drop = 5
	if aug:
	self.img_path = FerPlusConf.aug_train_img_path
	self.annotation_path = FerPlusConf.aug_train_annotation_path
	self.masked_img_path = FerPlusConf.aug_train_masked_img_path
	else:
	self.img_path = FerPlusConf.no_aug_train_img_path
	self.annotation_path = FerPlusConf.no_aug_train_annotation_path

	self.val_img_path = FerPlusConf.test_img_path
	self.val_annotation_path = FerPlusConf.test_annotation_path
	self.eval_masked_img_path = FerPlusConf.test_masked_img_path
	self.num_of_classes = 7
	self.num_of_samples = None

	elif dataset_name == DatasetName.rafdb:
	self.drop = 0.1
	self.epochs_drop = 5

	if aug:
	self.img_path = RafDBConf.aug_train_img_path
	self.annotation_path = RafDBConf.aug_train_annotation_path
	self.masked_img_path = RafDBConf.aug_train_masked_img_path
	else:
	self.img_path = RafDBConf.no_aug_train_img_path
	self.annotation_path = RafDBConf.no_aug_train_annotation_path

	self.val_img_path = RafDBConf.test_img_path
	self.val_annotation_path = RafDBConf.test_annotation_path
	self.eval_masked_img_path = RafDBConf.test_masked_img_path
	self.num_of_classes = 7
	self.num_of_samples = None

	elif dataset_name == DatasetName.affectnet:
	self.drop = 0.1
	self.epochs_drop = 5

	if ds_type == DatasetType.train:
	self.img_path = AffectnetConf.aug_train_img_path
	self.annotation_path = AffectnetConf.aug_train_annotation_path
	self.masked_img_path = AffectnetConf.aug_train_masked_img_path
	self.val_img_path = AffectnetConf.eval_img_path
	self.val_annotation_path = AffectnetConf.eval_annotation_path
	self.eval_masked_img_path = AffectnetConf.eval_masked_img_path
	self.num_of_classes = 8
	self.num_of_samples = AffectnetConf.num_of_samples_train
	elif ds_type == DatasetType.train_7:
	if aug:
	self.img_path = AffectnetConf.aug_train_img_path_7
	self.annotation_path = AffectnetConf.aug_train_annotation_path_7
	self.masked_img_path = AffectnetConf.aug_train_masked_img_path_7
	else:
	self.img_path = AffectnetConf.no_aug_train_img_path_7
	self.annotation_path = AffectnetConf.no_aug_train_annotation_path_7

	self.val_img_path = AffectnetConf.eval_img_path_7
	self.val_annotation_path = AffectnetConf.eval_annotation_path_7
	self.eval_masked_img_path = AffectnetConf.eval_masked_img_path_7
	self.num_of_classes = 7
	self.num_of_samples = AffectnetConf.num_of_samples_train_7

	def train(self, arch, weight_path):
	""""""

	'''create loss'''
	c_loss = CustomLosses()

	'''create summary writer'''
	summary_writer = tf.summary.create_file_writer(
	"./train_logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S"))
	start_train_date = datetime.now().strftime("%Y%m%d-%H%M%S")

	'''making models'''
	model = self.make_model(arch=arch, w_path=weight_path)
	'''create save path'''
	if self.dataset_name == DatasetName.affectnet:
	save_path = AffectnetConf.weight_save_path + start_train_date + '/'
	elif self.dataset_name == DatasetName.rafdb:
	save_path = RafDBConf.weight_save_path + start_train_date + '/'
	elif self.dataset_name == DatasetName.fer2013:
	save_path = FerPlusConf.weight_save_path + start_train_date + '/'
	if not os.path.exists(save_path):
	os.makedirs(save_path)

	'''create sample generator'''
	dhp = DataHelper()

	''' Train Generator'''
	img_filenames, exp_filenames = dhp.create_generator_full_path(img_path=self.img_path,
	annotation_path=self.annotation_path)
	'''create dataset'''
	cds = CustomDataset()
	ds = cds.create_dataset(img_filenames=img_filenames,
	anno_names=exp_filenames,
	is_validation=False)

	'''create train configuration'''
	step_per_epoch = len(img_filenames) // LearningConfig.batch_size
	gradients = None
	virtual_step_per_epoch = LearningConfig.virtual_batch_size // LearningConfig.batch_size

	'''create optimizer'''
	optimizer = tf.keras.optimizers.Adam(self.lr, decay=1e-5)

	'''start train:'''
	all_gt_exp = []
	all_pr_exp = []

	for epoch in range(LearningConfig.epochs):
	ce_weight = 2
	batch_index = 0

	for img_batch, exp_batch in ds:
	'''since the calculation of the confusion matrix will be time-consuming,
	we only save 1000 labels each time. Moreover, this help us to be more qiuck on updates
	'''
	all_gt_exp, all_pr_exp = self._update_all_labels_arrays(all_gt_exp, all_pr_exp)
	'''load annotation and images'''
	'''squeeze'''
	exp_batch = exp_batch[:, -1]
	img_batch = img_batch[:, -1, :, :]

	'''train step'''
	step_gradients, all_gt_exp, all_pr_exp = self.train_step(epoch=epoch, step=batch_index,
	total_steps=step_per_epoch,
	img_batch=img_batch,
	anno_exp=exp_batch,
	model=model, optimizer=optimizer,
	c_loss=c_loss,
	ce_weight=ce_weight,
	summary_writer=summary_writer,
	all_gt_exp=all_gt_exp,
	all_pr_exp=all_pr_exp)
	batch_index += 1

	'''evaluating part'''
	global_accuracy, conf_mat, avg_acc = self._eval_model(model=model)
	'''save weights'''
	save_name = save_path + '_' + str(epoch) + '_' + self.dataset_name + '_AC_' + str(global_accuracy)
	model.save(save_name + '.h5')
	self._save_confusion_matrix(conf_mat, save_name + '.txt')

	def train_step(self, epoch, step, total_steps, model, ce_weight,
	img_batch, anno_exp, optimizer, summary_writer, c_loss, all_gt_exp, all_pr_exp):
	with tf.GradientTape() as tape:
	pr_data = model([img_batch], training=True)
	exp_pr_vec = pr_data[0]
	embeddings = pr_data[1:]

	bs_size = tf.shape(exp_pr_vec, out_type=tf.dtypes.int64)[0]

	loss_exp, accuracy = c_loss.cross_entropy_loss(y_pr=exp_pr_vec, y_gt=anno_exp,
	num_classes=self.num_of_classes,
	ds_name=self.dataset_name)

	'''Feature difference loss'''
	# embedding_similarity_loss = 0
	embedding_similarity_loss = c_loss.embedding_loss_distance(embeddings=embeddings)

	'''update confusion matrix'''
	exp_pr = tf.constant([np.argmax(exp_pr_vec[i]) for i in range(bs_size)], dtype=tf.dtypes.int64)
	tr_conf_matrix, all_gt_exp, all_pr_exp = c_loss.update_confusion_matrix(anno_exp, # real labels
	exp_pr, # real labels
	all_gt_exp,
	all_pr_exp)
	''' correlation between the embeddings'''
	correlation_loss = c_loss.correlation_loss_multi(embeddings=embeddings,
	exp_gt_vec=anno_exp,
	exp_pr_vec=exp_pr_vec,
	tr_conf_matrix=tr_conf_matrix)
	'''mean loss'''
	mean_correlation_loss = c_loss.mean_embedding_loss_distance(embeddings=embeddings,
	exp_gt_vec=anno_exp,
	exp_pr_vec=exp_pr_vec,
	num_of_classes=self.num_of_classes)

	lamda_param = 50
	loss_total = lamda_param * loss_exp + \
	embedding_similarity_loss + \
	correlation_loss + \
	mean_correlation_loss

	# '''calculate gradient'''
	gradients_of_model = tape.gradient(loss_total, model.trainable_variables)
	# '''apply Gradients:'''
	optimizer.apply_gradients(zip(gradients_of_model, model.trainable_variables))
	# '''printing loss Values: '''
	tf.print("->EPOCH: ", str(epoch), "->STEP: ", str(step) + '/' + str(total_steps),
	' -> : accuracy: ', accuracy,
	' -> : loss_total: ', loss_total,
	' -> : loss_exp: ', loss_exp,
	' -> : embedding_similarity_loss: ', embedding_similarity_loss,
	' -> : correlation_loss: ', correlation_loss,
	' -> : mean_correlation_loss: ', mean_correlation_loss)
	with summary_writer.as_default():
	tf.summary.scalar('loss_total', loss_total, step=epoch)
	tf.summary.scalar('loss_exp', loss_exp, step=epoch)
	tf.summary.scalar('correlation_loss', correlation_loss, step=epoch)
	tf.summary.scalar('mean_correlation_loss', mean_correlation_loss, step=epoch)
	tf.summary.scalar('embedding_similarity_loss', embedding_similarity_loss, step=epoch)
	return gradients_of_model, all_gt_exp, all_pr_exp

	def train_step_old(self, epoch, step, total_steps, model, ce_weight,
	img_batch, anno_exp, optimizer, summary_writer, c_loss, all_gt_exp, all_pr_exp):
	with tf.GradientTape() as tape:
	# '''create annotation_predicted'''
	# exp_pr, embedding = model([img_batch], training=True)
	exp_pr_vec, embedding_class, embedding_mean, embedding_var = model([img_batch], training=True)

	bs_size = tf.shape(exp_pr_vec, out_type=tf.dtypes.int64)[0]
	# # '''CE loss'''
	loss_exp, accuracy = c_loss.cross_entropy_loss(y_pr=exp_pr_vec, y_gt=anno_exp,
	num_classes=self.num_of_classes,
	ds_name=self.dataset_name)
	#
	loss_cls_mean, loss_cls_var, loss_mean_var = c_loss.embedding_loss_distance(
	embedding_class=embedding_class,
	embedding_mean=embedding_mean,
	embedding_var=embedding_var,
	bs_size=bs_size)
	feature_diff_loss = loss_cls_mean + loss_cls_var + loss_mean_var

	# correlation between the class_embeddings
	cor_loss, all_gt_exp, all_pr_exp = c_loss.correlation_loss(embedding=embedding_class, # distribution
	exp_gt_vec=anno_exp,
	exp_pr_vec=exp_pr_vec,
	num_of_classes=self.num_of_classes,
	all_gt_exp=all_gt_exp,
	all_pr_exp=all_pr_exp)
	# correlation between the mean_emb_cor_loss
	mean_emb_cor_loss, mean_emb_kl_loss = c_loss.mean_embedding_loss(embedding=embedding_mean,
	exp_gt_vec=anno_exp,
	exp_pr_vec=exp_pr_vec,
	num_of_classes=self.num_of_classes)
	mean_loss = mean_emb_cor_loss + 10 * mean_emb_kl_loss

	var_emb_cor_loss, var_emb_kl_loss = c_loss.variance_embedding_loss(embedding=embedding_var,
	exp_gt_vec=anno_exp,
	exp_pr_vec=exp_pr_vec,
	num_of_classes=self.num_of_classes)
	var_loss = var_emb_cor_loss + 10 * var_emb_kl_loss
	# '''total:'''
	loss_total = 100 * loss_exp + cor_loss + 10 * feature_diff_loss + mean_loss + var_loss

	# '''calculate gradient'''
	gradients_of_model = tape.gradient(loss_total, model.trainable_variables)
	# '''apply Gradients:'''
	optimizer.apply_gradients(zip(gradients_of_model, model.trainable_variables))
	# '''printing loss Values: '''
	tf.print("->EPOCH: ", str(epoch), "->STEP: ", str(step) + '/' + str(total_steps),
	' -> : accuracy: ', accuracy,
	' -> : loss_total: ', loss_total,
	' -> : loss_exp: ', loss_exp,
	' -> : cor_loss: ', cor_loss,
	' -> : feature_loss: ', feature_diff_loss,
	' -> : mean_loss: ', mean_loss,
	' -> : var_loss: ', var_loss)

	with summary_writer.as_default():
	tf.summary.scalar('loss_total', loss_total, step=epoch)
	tf.summary.scalar('loss_exp', loss_exp, step=epoch)
	tf.summary.scalar('loss_correlation', cor_loss, step=epoch)
	return gradients_of_model, all_gt_exp, all_pr_exp

	def _eval_model(self, model):
	""""""
	'''first we need to create the 4 bunch here: '''

	'''for Affectnet, we need to calculate accuracy of each label and then total avg accuracy:'''
	global_accuracy = 0
	avg_acc = 0
	conf_mat = []
	if self.dataset_name == DatasetName.affectnet:
	if self.ds_type == DatasetType.train:
	affn = AffectNet(ds_type=DatasetType.eval)
	else:
	affn = AffectNet(ds_type=DatasetType.eval_7)
	global_accuracy, conf_mat, avg_acc, precision, recall, fscore, support = \
	affn.test_accuracy(model=model)
	elif self.dataset_name == DatasetName.rafdb:
	rafdb = RafDB(ds_type=DatasetType.test)
	global_accuracy, conf_mat, avg_acc, precision, recall, fscore, support = rafdb.test_accuracy(model=model)
	elif self.dataset_name == DatasetName.fer2013:
	ferplus = FerPlus(ds_type=DatasetType.test)
	global_accuracy, conf_mat, avg_acc, precision, recall, fscore, support = ferplus.test_accuracy(model=model)
	print("================== global_accuracy =====================")
	print(global_accuracy)
	print("================== Average Accuracy =====================")
	print(avg_acc)
	print("================== Confusion Matrix =====================")
	print(conf_mat)
	return global_accuracy, conf_mat, avg_acc

	def make_model(self, arch, w_path):
	cnn = CNNModel()
	model = cnn.get_model(arch=arch, num_of_classes=LearningConfig.num_classes, weights=self.weights)
	if w_path is not None:
	model.load_weights(w_path)
	return model

	def _save_confusion_matrix(self, conf_mat, save_name):
	f = open(save_name, "a")
	print(save_name)
	f.write(np.array_str(conf_mat))
	f.close()

	def _update_all_labels_arrays(self, all_gt_exp, all_pr_exp):
	if len(all_gt_exp) < LearningConfig.labels_history_frame:
	return all_gt_exp, all_pr_exp
	else: # remove the first batch:
	return all_gt_exp[LearningConfig.batch_size:], all_pr_exp[LearningConfig.batch_size:]