Spaces:

space-sue
/

hf-speech-eval

Sleeping

App Files Files Community

hf-speech-eval / face_emotion_detection.py

space-sue

initial commit

9310327 over 1 year ago

raw

history blame contribute delete

4.26 kB

	import argparse
	import datetime
	import os
	import json
	import numpy as np
	import matplotlib.pyplot as plt
	import cv2
	from PIL import Image
	import tensorflow as tf
	from tensorflow.keras.models import Model, Sequential, load_model, model_from_json
	from tensorflow.compat.v1.keras.backend import set_session
	from facial_analysis import FacialImageProcessing

	class NpEncoder(json.JSONEncoder):
	def default(self, obj):
	if isinstance(obj, np.integer):
	return int(obj)
	if isinstance(obj, np.floating):
	return float(obj)
	if isinstance(obj, np.ndarray):
	return obj.tolist()
	return super(NpEncoder, self).default(obj)

	def initialize():
	config = tf.compat.v1.ConfigProto()
	config.gpu_options.allow_growth = True
	sess = tf.compat.v1.Session(config=config)
	set_session(sess)


	def mobilenet_preprocess_input(x, **kwargs):
	x[..., 0] -= 103.939
	x[..., 1] -= 116.779
	x[..., 2] -= 123.68
	return x


	def detect_emotion(frame_bgr):
	imgProcessing = FacialImageProcessing(False)
	model = load_model('./models/affectnet_emotions/mobilenet_7.h5')
	# print(model.summary())
	preprocessing_function = mobilenet_preprocess_input
	INPUT_SIZE = (224, 224)
	idx_to_class = {0: 'Anger', 1: 'Disgust', 2: 'Fear',
	3: 'Happiness', 4: 'Neutral', 5: 'Sadness', 6: 'Surprise'}

	frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
	bounding_boxes, points = imgProcessing.detect_faces(frame)
	points = points.T
	detections = {"id": str(datetime.datetime.now())}

	for bbox, p in zip(bounding_boxes, points):
	face_pred = {}
	box = bbox.astype(np.int)
	x1, y1, x2, y2 = box[0:4]
	face_img = frame[y1:y2, x1:x2, :]
	try:
	face_img = cv2.resize(face_img, INPUT_SIZE)
	except:
	break
	inp = face_img.astype(np.float32)
	inp[..., 0] -= 103.939
	inp[..., 1] -= 116.779
	inp[..., 2] -= 123.68
	inp = np.expand_dims(inp, axis=0)
	scores = model.predict(inp)[0]
	frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 9, 12), 4)
	cv2.putText(frame, idx_to_class[np.argmax(scores)] + ' ' + str(scores[np.argmax(
	scores)]), (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
	face_pred["face_bbox"] = [x1,y1,x2,y2]
	face_pred["emotion_predicted"] = idx_to_class[np.argmax(scores)]
	all_scores = {}
	for i in range(len(scores)):
	all_scores[str(idx_to_class[i])] = scores[i]
	face_pred["scores"] = all_scores

	detections["face"] = face_pred
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	print(detections)
	return frame, detections


	def process_video(video):
	basename = os.path.basename(video)
	name_only = os.path.splitext(basename)[0]
	video_outputpath = os.path.join('./output',basename)
	json_outputpath = os.path.join('./output',name_only + '.json')

	# Writing to sample.json
	with open(json_outputpath, "w") as jsonfile:
	videocap = cv2.VideoCapture(video) # fpath)
	ret, frame = videocap.read()
	fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
	fps = 24.0
	size = (frame.shape[1], frame.shape[0])
	out = cv2.VideoWriter(video_outputpath, fourcc, fps, size)
	# for i in range(len(image_array)):
	# out.write(image_array[i])
	max_frame = 500
	cnt = 0
	while ret == True and cnt < 50:
	processed_frame, detections = detect_emotion(frame)
	json_object = json.dumps(detections, indent=4, cls=NpEncoder)
	jsonfile.write(json_object)
	cv2.imshow('img', np.array(processed_frame, dtype=np.uint8))
	out.write(processed_frame)
	ret, frame = videocap.read()
	cv2.waitKey(1)
	cnt += 1
	videocap.release()
	cv2.destroyAllWindows()
	return out


	def main():
	parser = argparse.ArgumentParser(description='Analysis of Video')
	parser.add_argument(
	'-v', '--video', help='Video to be analysed', required=True)
	args = parser.parse_args()
	process_video(args.video)


	if __name__ == '__main__':
	main()