abrar-adnan commited on
Commit
8ced839
1 Parent(s): 197af76

added emotion analysis

Browse files
Files changed (2) hide show
  1. app.py +9 -1
  2. optimized.py +97 -0
app.py CHANGED
@@ -8,7 +8,7 @@ import base64
8
  from deepface import DeepFace
9
  import torchaudio
10
  import moviepy.editor as mp
11
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
12
 
13
  # import pathlib
14
  # temp = pathlib.PosixPath
@@ -23,6 +23,8 @@ backends = [
23
  'mediapipe'
24
  ]
25
 
 
 
26
  def getTranscription(path):
27
  # Insert Local Video File Path
28
  clip = mp.VideoFileClip(path)
@@ -51,6 +53,10 @@ def getTranscription(path):
51
 
52
  model = load_learner("gaze-recognizer-v3.pkl")
53
 
 
 
 
 
54
  def video_processing(video_file, encoded_video):
55
  angry = 0
56
  disgust = 0
@@ -74,6 +80,8 @@ def video_processing(video_file, encoded_video):
74
 
75
  transcription = getTranscription(video_file)
76
  print(transcription)
 
 
77
 
78
  video_capture = cv2.VideoCapture(video_file)
79
  on_camera = 0
 
8
  from deepface import DeepFace
9
  import torchaudio
10
  import moviepy.editor as mp
11
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
12
 
13
  # import pathlib
14
  # temp = pathlib.PosixPath
 
23
  'mediapipe'
24
  ]
25
 
26
+ emotion_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-emotion")
27
+
28
  def getTranscription(path):
29
  # Insert Local Video File Path
30
  clip = mp.VideoFileClip(path)
 
53
 
54
  model = load_learner("gaze-recognizer-v3.pkl")
55
 
56
+ def analyze_emotion(text):
57
+ result = emotion_pipeline(text)
58
+ return result
59
+
60
  def video_processing(video_file, encoded_video):
61
  angry = 0
62
  disgust = 0
 
80
 
81
  transcription = getTranscription(video_file)
82
  print(transcription)
83
+ text_emotion = analyze_emotion(transcription)
84
+ print(text_emotion)
85
 
86
  video_capture = cv2.VideoCapture(video_file)
87
  on_camera = 0
optimized.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import cv2
3
+ import face_recognition
4
+ import gradio as gr
5
+ import moviepy.editor as mp
6
+ import os
7
+ import time
8
+ import torchaudio
9
+ from fastai.vision.all import load_learner
10
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
11
+
12
+ emotion_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-emotion")
13
+
14
+ model = load_learner("gaze-recognizer-v3.pkl")
15
+
16
+ def extract_audio(video_path):
17
+ clip = mp.VideoFileClip(video_path)
18
+ clip.audio.write_audiofile("audio.wav")
19
+
20
+ def get_transcription(path):
21
+ extract_audio(path)
22
+
23
+ waveform, sample_rate = torchaudio.load("audio.wav")
24
+ resampler = torchaudio.transforms.Resample(sample_rate, 16000)
25
+ waveform = resampler(waveform)[0]
26
+
27
+ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
28
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
29
+ model.config.forced_decoder_ids = None
30
+
31
+ input_features = processor(waveform.squeeze(dim=0), return_tensors="pt").input_features
32
+ predicted_ids = model.generate(input_features)
33
+
34
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
35
+ return transcription[0]
36
+
37
+ def analyze_emotion(text):
38
+ result = emotion_pipeline(text)
39
+ return result
40
+
41
+ def process_frame(frame):
42
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
43
+ face_locations = face_recognition.face_locations(gray)
44
+
45
+ if len(face_locations) > 0:
46
+ for top, right, bottom, left in face_locations:
47
+ face_image = gray[top:bottom, left:right]
48
+ resized_face_image = cv2.resize(face_image, (128, 128))
49
+ result = model.predict(resized_face_image)
50
+
51
+ return result[0]
52
+
53
+ return None
54
+
55
+ def video_processing(video_file, encoded_video):
56
+ if encoded_video != "":
57
+ decoded_file_data = base64.b64decode(encoded_video)
58
+ with open("temp_video.mp4", "wb") as f:
59
+ f.write(decoded_file_data)
60
+ video_file = "temp_video.mp4"
61
+
62
+ transcription = get_transcription(video_file)
63
+ print(transcription)
64
+
65
+ video_capture = cv2.VideoCapture(video_file)
66
+ on_camera = 0
67
+ off_camera = 0
68
+ total = 0
69
+ emotions = []
70
+
71
+ while True:
72
+ for _ in range(24 * 3):
73
+ ret, frame = video_capture.read()
74
+ if not ret:
75
+ break
76
+
77
+ if not ret:
78
+ break
79
+
80
+ result = process_frame(frame)
81
+ if result:
82
+ if result == 'on_camera':
83
+ on_camera += 1
84
+ elif result == 'off_camera':
85
+ off_camera += 1
86
+ total += 1
87
+
88
+ emotion_results = analyze_emotion(transcription)
89
+ emotions.append(emotion_results)
90
+
91
+ video_capture.release()
92
+ cv2.destroyAllWindows()
93
+
94
+ if os.path.exists("temp_video.mp4"):
95
+ os.remove("temp_video.mp4")
96
+
97
+ gaze_percentage = on_camera / total * 100 if total > 0