RainPoo commited on
Commit
1ff6afc
·
verified ·
1 Parent(s): 50406c0

Upload 41 files

Browse files
Files changed (41) hide show
  1. src/.DS_Store +0 -0
  2. src/.gradio/certificate.pem +31 -0
  3. src/app.py +432 -0
  4. src/archive/__pycache__/main_test.cpython-312-pytest-8.3.4.pyc +0 -0
  5. src/archive/__pycache__/sample_inputs.cpython-312.pyc +0 -0
  6. src/archive/main_test.py +97 -0
  7. src/archive/sample_inputs.py +98 -0
  8. src/configs/database/.DS_Store +0 -0
  9. src/configs/database/__pycache__/firebase.cpython-312.pyc +0 -0
  10. src/configs/database/firebase.py +175 -0
  11. src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml +4 -0
  12. src/configs/llm/openai-gpt-3.5-turbo.yaml +4 -0
  13. src/configs/llm/openai-gpt-4o-mini.yaml +4 -0
  14. src/configs/parser/llamaparse_en.yaml +7 -0
  15. src/domain/candidate.py +13 -0
  16. src/domain/enums/__pycache__/emotion_types.cpython-312.pyc +0 -0
  17. src/domain/enums/emotion_types.py +21 -0
  18. src/domain/enums/interview_status.py +11 -0
  19. src/domain/interview.py +28 -0
  20. src/llm/__pycache__/base_llm_provider.cpython-312.pyc +0 -0
  21. src/llm/__pycache__/enums.cpython-312.pyc +0 -0
  22. src/llm/__pycache__/llm.cpython-312.pyc +0 -0
  23. src/llm/__pycache__/nvidia_llm.cpython-312.pyc +0 -0
  24. src/llm/__pycache__/openai_llm.cpython-312.pyc +0 -0
  25. src/llm/base_llm_provider.py +16 -0
  26. src/llm/enums.py +3 -0
  27. src/llm/llm.py +32 -0
  28. src/llm/nvidia_llm.py +29 -0
  29. src/llm/openai_llm.py +29 -0
  30. src/output/.DS_Store +0 -0
  31. src/output/report.docx +0 -0
  32. src/service/__pycache__/emotion_recognition.cpython-312.pyc +0 -0
  33. src/service/__pycache__/resume_parser.cpython-312.pyc +0 -0
  34. src/service/emotion_recognition.py +136 -0
  35. src/service/resume_parser.py +42 -0
  36. src/template/__pycache__/grading_prompt.cpython-312.pyc +0 -0
  37. src/template/__pycache__/parser_prompt.cpython-312.pyc +0 -0
  38. src/template/grading_prompt.py +111 -0
  39. src/template/parser_prompt.py +21 -0
  40. src/utils/__pycache__/utils.cpython-312.pyc +0 -0
  41. src/utils/utils.py +103 -0
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
src/app.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import logging
4
+ from pathlib import Path
5
+ from docx import Document
6
+ from typing import Optional, List
7
+ from dataclasses import dataclass
8
+ from dotenv import load_dotenv
9
+ from src.archive.sample_inputs import INTERVIEW_QUESTION, JOB_REQUIREMENTS
10
+ from src.configs.database.firebase import write_user_data, read_all_users
11
+ from src.llm.llm import get_llm
12
+ from src.service.emotion_recognition import EmotionRecognition
13
+ from src.service.resume_parser import ResumeParser
14
+ from src.utils.utils import (
15
+ parse_yaml_string,
16
+ extract_audio,
17
+ audio2text,
18
+ sample_frames,
19
+ )
20
+ from src.template.grading_prompt import (
21
+ GRADE_RESPONSE_PROMPT,
22
+ RANKING_AND_FEEDBACK_PROMPT,
23
+ )
24
+
25
+ load_dotenv()
26
+ # ENVIRONMENT = os.getenv("ENVIRONMENT", "local")
27
+
28
+ # Define base paths dynamically
29
+ # if ENVIRONMENT == "local":
30
+ # BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent
31
+ # else: # Assume hosted on Hugging Face Spaces
32
+ BASE_DIR = Path(".").resolve()
33
+
34
+ LLM_CONFIG_FILE = BASE_DIR / "configs/llm/openai-gpt-3.5-turbo.yaml"
35
+ RESUME_PARSER_CONFIG_FILE = BASE_DIR / "configs/parser/llamaparse_en.yaml"
36
+ OUTPUT_AUDIO_FILE_EMPTY = BASE_DIR / "output/audio_output.wav"
37
+ OUTPUT_REPORT_FILE_EMPTY = BASE_DIR / "output/report.docx"
38
+
39
+
40
+ @dataclass
41
+ class ProcessingResult:
42
+ candidate_name: Optional[str] = None
43
+ candidate_score: Optional[int] = None
44
+ candidate_feedbacks: Optional[List[str]] = None
45
+ feedback_md: Optional[str] = None
46
+ interview_question: Optional[str] = None
47
+ job_requirements: Optional[str] = None
48
+ error_message: Optional[str] = None
49
+
50
+
51
+ class GradioInterface:
52
+ VALID_VIDEO_EXTENSIONS = {".mp4", ".avi", ".mkv"}
53
+ VALID_RESUME_EXTENSIONS = {".pdf"}
54
+
55
+ def __init__(self):
56
+ self.parser = None
57
+ self.llm = None
58
+ self.logger = None
59
+ self.candidate_feedback = pd.DataFrame(columns=["Name", "Score", "Feedback"])
60
+ self.setup_logging()
61
+ self.initialize_services()
62
+
63
+ def setup_logging(self):
64
+ logging.basicConfig(
65
+ level=logging.INFO,
66
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
67
+ )
68
+ self.logger = logging.getLogger(__name__)
69
+
70
+ def initialize_services(self):
71
+ try:
72
+ self.llm = get_llm(str(LLM_CONFIG_FILE))
73
+ self.parser = ResumeParser(str(RESUME_PARSER_CONFIG_FILE))
74
+ except Exception as e:
75
+ self.logger.error(f"Failed to initialize services: {str(e)}")
76
+ raise
77
+
78
+ def validate_inputs(
79
+ self,
80
+ video_path: Optional[str],
81
+ resume_path: Optional[str],
82
+ interview_questions: Optional[str],
83
+ job_requirements: Optional[str],
84
+ ) -> Optional[str]:
85
+ if not video_path:
86
+ return "Please upload an interview video."
87
+ if not resume_path:
88
+ return "Please upload a resume (PDF)."
89
+ if not interview_questions:
90
+ return "Please provide interview questions."
91
+ if not job_requirements:
92
+ return "Please provide job requirements."
93
+ if not self._validate_file_format(video_path, self.VALID_VIDEO_EXTENSIONS):
94
+ return "Invalid video format."
95
+ if not self._validate_file_format(resume_path, self.VALID_RESUME_EXTENSIONS):
96
+ return "Please submit resume in PDF format."
97
+ return None
98
+
99
+ def _validate_file_format(self, file_path: str, valid_extensions: set) -> bool:
100
+ return isinstance(file_path, str) and any(
101
+ file_path.lower().endswith(ext) for ext in valid_extensions
102
+ )
103
+
104
+ def process_video(self, video_path: str) -> Optional[str]:
105
+ OUTPUT_AUDIO_FILE = extract_audio(video_path, str(OUTPUT_AUDIO_FILE_EMPTY))
106
+ audio_text = audio2text(OUTPUT_AUDIO_FILE)
107
+ return audio_text
108
+
109
+ def analyze_emotions(self, video_path: str) -> Optional[str]:
110
+ frames = sample_frames(video_path, sample_rate=8)
111
+ emotions = EmotionRecognition.detect_face_emotions(frames)
112
+ emotions_dict = EmotionRecognition.process_emotions(emotions)
113
+ conf_score = emotions_dict["conf"]
114
+ return conf_score
115
+
116
+ def process_resume(self, resume_path: str) -> Optional[str]:
117
+ resume_md = self.parser.parse_resume_to_markdown(resume_path)
118
+ return resume_md
119
+
120
+ def format_feedback_to_markdown(self, feedback_df: pd.DataFrame) -> str:
121
+ if feedback_df.empty:
122
+ return "No feedback available."
123
+
124
+ name = feedback_df["Name"].iloc[0]
125
+ score = feedback_df["Score"].iloc[0]
126
+
127
+ # Start with header
128
+ markdown_text = f"""
129
+ # Candidate Assessment Report 📝
130
+
131
+ ## Candidate Name ✨
132
+ {name}
133
+
134
+ ## Candidate Overall Score 🎯
135
+ {score}/100
136
+
137
+ ## Detailed Feedback 🛠️
138
+ """
139
+
140
+ for idx, row in feedback_df.iterrows():
141
+ markdown_text += f"- {row['Feedback']}\n\n"
142
+
143
+ return markdown_text
144
+
145
+ def get_feedback(
146
+ self,
147
+ itv_question: str,
148
+ job_requirements: str,
149
+ conf_score: str,
150
+ audio_text: str,
151
+ resume_md: str,
152
+ ) -> pd.DataFrame:
153
+
154
+ formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
155
+ interview_question=itv_question,
156
+ conf_score=conf_score,
157
+ response_text=audio_text,
158
+ )
159
+
160
+ grade = self.llm.complete(formatted_grading_prompt)
161
+
162
+ formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
163
+ job_requirements=job_requirements,
164
+ interview_feedback=grade,
165
+ resume_text=resume_md,
166
+ )
167
+ rank_and_feedback = self.llm.complete(formatted_ranking_prompt)
168
+
169
+ expected_keys = ["name", "score", "feedback"]
170
+ rank_and_feedback_dict = parse_yaml_string(
171
+ yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
172
+ )
173
+
174
+ return pd.DataFrame(
175
+ {
176
+ "Name": rank_and_feedback_dict["name"],
177
+ "Score": rank_and_feedback_dict["score"],
178
+ "Feedback": rank_and_feedback_dict["feedback"],
179
+ }
180
+ )
181
+
182
+ def process_submission(
183
+ self,
184
+ video_path: str,
185
+ resume_path: str,
186
+ interview_questions: str,
187
+ job_title: str,
188
+ job_requirements: str,
189
+ ) -> ProcessingResult:
190
+ try:
191
+ # Validate inputs
192
+ error_message = self.validate_inputs(
193
+ video_path, resume_path, interview_questions, job_requirements
194
+ )
195
+ if error_message:
196
+ return ProcessingResult(error_message=error_message)
197
+
198
+ # Process inputs
199
+ video_transcript = self.process_video(video_path)
200
+ emotion_analysis = self.analyze_emotions(video_path)
201
+ resume_analysis = self.process_resume(resume_path)
202
+
203
+ feedback_list = self.get_feedback(
204
+ interview_questions,
205
+ job_requirements,
206
+ emotion_analysis,
207
+ video_transcript,
208
+ resume_analysis,
209
+ )
210
+
211
+ # Update feedback database
212
+ self.candidate_feedback = pd.concat(
213
+ [self.candidate_feedback, feedback_list], ignore_index=True
214
+ )
215
+
216
+ # TODO: For testing purposes
217
+ # job_title = "LLM Engineer"
218
+ # interview_questions = INTERVIEW_QUESTION
219
+ # job_requirements = JOB_REQUIREMENTS
220
+ # self.candidate_feedback = pd.DataFrame(
221
+ # {
222
+ # "Name": ["Goh Yi Xian"] * 4,
223
+ # "Score": [50, 50, 50, 50],
224
+ # "Feedback": [
225
+ # "The interviewee's technical skills align partially with the job requirements, showcasing proficiency in deep learning frameworks like PyTorch and TensorFlow. However, there is a lack of experience in training and fine-tuning transformer-based models and working with MLOps tools for deployment.",
226
+ # "The educational background meets the criteria with a Bachelor's degree in Computer Science, but the lack of a Ph.D. and limited industry experience may hinder full alignment with the role.",
227
+ # "The interview performance indicates a need for improvement in problem-solving skills, confidence, and engagement. The response lacked clarity, relevance, and demonstrated understanding of the key aspects of the job requirements.",
228
+ # "Overall, while there are some matching skills and experiences, the interviewee falls short in demonstrating a comprehensive fit for the LLM Engineer position. Further development in technical expertise, problem-solving abilities, and communication skills is recommended.",
229
+ # ],
230
+ # }
231
+ # )
232
+
233
+ write_user_data(
234
+ self.candidate_feedback["Name"].iloc[0],
235
+ self.candidate_feedback["Score"].iloc[0],
236
+ interview_questions,
237
+ job_title,
238
+ job_requirements,
239
+ self.candidate_feedback["Feedback"].tolist(),
240
+ )
241
+
242
+ feedback_md = self.format_feedback_to_markdown(self.candidate_feedback)
243
+
244
+ return ProcessingResult(
245
+ candidate_name=self.candidate_feedback["Name"].iloc[0],
246
+ candidate_score=self.candidate_feedback["Score"].iloc[0],
247
+ candidate_feedbacks=self.candidate_feedback["Feedback"].tolist(),
248
+ feedback_md=feedback_md,
249
+ interview_question=interview_questions,
250
+ job_requirements=job_requirements,
251
+ )
252
+
253
+ except Exception as e:
254
+ self.logger.error(f"Error in process_submission: {str(e)}")
255
+ return ProcessingResult(
256
+ error_message=f"An error occurred during processing: {str(e)}"
257
+ )
258
+
259
+ def save_report(
260
+ self,
261
+ candidate_name,
262
+ candidate_score,
263
+ candidate_feedback,
264
+ interview_question,
265
+ job_requirements,
266
+ ) -> Optional[str]:
267
+ try:
268
+ if self.candidate_feedback.empty:
269
+ return None
270
+
271
+ doc = Document()
272
+ doc.add_heading(f"Interview Analysis Report - {candidate_name}", 0)
273
+ doc.add_heading("Interview Questions", 1)
274
+ doc.add_paragraph(interview_question)
275
+ doc.add_heading("Job Requirements", 1)
276
+ doc.add_paragraph(job_requirements)
277
+ doc.add_heading("Overall Score", 1)
278
+ paragraph = doc.add_paragraph()
279
+ paragraph.add_run(f"{candidate_score}/100").bold = True
280
+ doc.add_heading("Detailed Feedback", 1)
281
+
282
+ for feedback in candidate_feedback:
283
+ doc.add_paragraph(f"• {feedback}")
284
+
285
+ doc.save(str(OUTPUT_REPORT_FILE_EMPTY))
286
+ return str(OUTPUT_REPORT_FILE_EMPTY)
287
+
288
+ except Exception as e:
289
+ self.logger.error(f"Error saving report: {str(e)}")
290
+ return None
291
+
292
+ def create_interface(self) -> gr.Blocks:
293
+
294
+ theme = gr.themes.Ocean(
295
+ primary_hue="pink",
296
+ secondary_hue="rose",
297
+ font="Chalkboard",
298
+ )
299
+
300
+ with gr.Blocks(title="HR Interview Analysis System", theme=theme) as demo:
301
+ gr.Markdown("# HR Interview Analysis System")
302
+
303
+ with gr.Row():
304
+ with gr.Column():
305
+ video_input = gr.Video(label="Upload Interview Video", format="mp4")
306
+ resume_input = gr.File(
307
+ label="Upload Resume (PDF)", file_types=[".pdf"]
308
+ )
309
+
310
+ with gr.Row():
311
+ question_input = gr.Textbox(
312
+ label="Interview Questions",
313
+ lines=5,
314
+ placeholder="Enter the interview questions here...",
315
+ )
316
+
317
+ with gr.Row():
318
+ job_title_input = gr.Textbox(
319
+ label="Job Title",
320
+ lines=5,
321
+ placeholder="Enter the job title here...",
322
+ )
323
+ requirements_input = gr.Textbox(
324
+ label="Job Requirements",
325
+ lines=5,
326
+ placeholder="Enter the job requirements here...",
327
+ )
328
+
329
+ submit_button = gr.Button("Analyze Interview", variant="primary")
330
+
331
+ # Error message display
332
+ error_output = gr.Markdown(visible=False)
333
+
334
+ with gr.Tabs():
335
+ with gr.Tab("Analysis Results"):
336
+ feedback_output_md = gr.Markdown(
337
+ label="Candidate Assessment",
338
+ value="No assessment available yet.",
339
+ )
340
+
341
+ save_button = gr.Button("Generate Report", variant="secondary")
342
+ report_output = gr.File(label="Download Report")
343
+
344
+ with gr.Tab("Candidates List"):
345
+ candidates_df = gr.Dataframe(
346
+ headers=[
347
+ "Name",
348
+ "Job Title",
349
+ "Interview Question",
350
+ "Score",
351
+ "Feedback",
352
+ ],
353
+ datatype=["str", "str", "str", "int", "str"],
354
+ row_count=(0, "dynamic"),
355
+ col_count=(5, "fixed"),
356
+ value=read_all_users(), # Load initial data
357
+ interactive=True,
358
+ wrap=True,
359
+ )
360
+
361
+ refresh_button = gr.Button("Refresh Candidates List")
362
+
363
+ refresh_button.click(
364
+ fn=lambda: read_all_users(), # Reload the candidates data
365
+ inputs=[],
366
+ outputs=[candidates_df],
367
+ )
368
+
369
+ candidate_name_state = gr.State()
370
+ candidate_score_state = gr.State()
371
+ candidate_feedbacks_state = gr.State()
372
+ interview_question_state = gr.State()
373
+ job_requirements_state = gr.State()
374
+
375
+ # Event handlers
376
+ submit_button.click(
377
+ fn=lambda video, resume, questions, job_title, requirements: (
378
+ lambda result: (
379
+ result.candidate_name,
380
+ result.candidate_score,
381
+ result.candidate_feedbacks,
382
+ result.feedback_md,
383
+ result.interview_question,
384
+ result.job_requirements,
385
+ result.error_message,
386
+ )
387
+ )(
388
+ self.process_submission(
389
+ video, resume, questions, job_title, requirements
390
+ )
391
+ ),
392
+ inputs=[
393
+ video_input,
394
+ resume_input,
395
+ question_input,
396
+ job_title_input,
397
+ requirements_input,
398
+ ],
399
+ outputs=[
400
+ candidate_name_state,
401
+ candidate_score_state,
402
+ candidate_feedbacks_state,
403
+ feedback_output_md,
404
+ interview_question_state,
405
+ job_requirements_state,
406
+ error_output,
407
+ ],
408
+ )
409
+
410
+ save_button.click(
411
+ fn=self.save_report,
412
+ inputs=[
413
+ candidate_name_state,
414
+ candidate_score_state,
415
+ candidate_feedbacks_state,
416
+ interview_question_state,
417
+ job_requirements_state,
418
+ ],
419
+ outputs=[report_output],
420
+ )
421
+
422
+ return demo
423
+
424
+
425
+ def launch_app():
426
+ app = GradioInterface()
427
+ interface = app.create_interface()
428
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True)
429
+
430
+
431
+ if __name__ == "__main__":
432
+ launch_app()
src/archive/__pycache__/main_test.cpython-312-pytest-8.3.4.pyc ADDED
Binary file (3.71 kB). View file
 
src/archive/__pycache__/sample_inputs.cpython-312.pyc ADDED
Binary file (5.16 kB). View file
 
src/archive/main_test.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from docx import Document
3
+
4
+ from src.llm.llm import get_llm
5
+ from src.service.resume_parser import ResumeParser
6
+ from src.service.emotion_recognition import EmotionRecognition
7
+ from src.utils.utils import (
8
+ extract_audio,
9
+ audio2text,
10
+ sample_frames,
11
+ parse_yaml_string,
12
+ )
13
+ from src.template.grading_prompt import (
14
+ GRADE_RESPONSE_PROMPT,
15
+ RANKING_AND_FEEDBACK_PROMPT,
16
+ )
17
+
18
+ # sample input values
19
+ from src.archive.sample_inputs import (
20
+ VIDEO_PATH,
21
+ RESUME_PATH,
22
+ INTERVIEW_QUESTION,
23
+ JOB_REQUIREMENTS,
24
+ )
25
+
26
+
27
+ # customise this part
28
+ LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
29
+ # LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
30
+ # LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
31
+
32
+ RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
33
+ OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav" # only supports .wav
34
+ OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
35
+
36
+ # init API keys as env variables
37
+ load_dotenv()
38
+
39
+ # init LLM & resume parser
40
+ llm = get_llm(LLM_CONFIG_FILE)
41
+ parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
42
+
43
+
44
+ # 1. extract audio from video
45
+ OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
46
+ assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
47
+
48
+ # 2. audio to text
49
+ audio_text = audio2text(OUTPUT_AUDIO_FILE)
50
+ print(audio_text)
51
+
52
+ # 3. extract frames form video
53
+ frames = sample_frames(VIDEO_PATH, sample_rate=8)
54
+ print(frames)
55
+
56
+ # 4. deepface extract emotions & compite confidence scores
57
+ emotions = EmotionRecognition.detect_face_emotions(frames)
58
+ emotions_dict = EmotionRecognition.process_emotions(emotions)
59
+ conf_score = emotions_dict["conf"]
60
+ print(emotions_dict)
61
+
62
+ # 5. llamaparse parse resume into MD
63
+ resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
64
+ print(resume_md)
65
+
66
+ # 6. llm grade question response
67
+ formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
68
+ interview_question=INTERVIEW_QUESTION,
69
+ conf_score=conf_score,
70
+ response_text=audio_text,
71
+ )
72
+ grade = llm.complete(formatted_grading_prompt)
73
+ print(grade)
74
+
75
+ # 7. llm rank and output final feedback
76
+ formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
77
+ job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
78
+ )
79
+ rank_and_feedback = llm.complete(formatted_ranking_prompt)
80
+ print(rank_and_feedback)
81
+
82
+
83
+ # 8. save to .docx report
84
+ expected_keys = ["name", "score", "feedback"]
85
+ rank_and_feedback_dict = parse_yaml_string(
86
+ yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
87
+ )
88
+ print(rank_and_feedback_dict)
89
+
90
+ doc = Document()
91
+ doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
92
+ doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
93
+ doc.add_heading(f"Brief Overview", 1)
94
+ doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
95
+
96
+ # Save the document
97
+ doc.save(OUTPUT_REPORT_FILE)
src/archive/sample_inputs.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
2
+
3
+ VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
4
+
5
+ INTERVIEW_QUESTION = """
6
+ Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
7
+ Walk us through your approach to dataset preparation, model optimization, and deployment.
8
+ How did you handle challenges like ensuring the model's performance, scalability, and fairness?
9
+ """
10
+
11
+ JOB_REQUIREMENTS = """
12
+ Job Title: LLM Engineer
13
+
14
+ Job Description:
15
+ ################
16
+ - We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
17
+ have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
18
+ various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
19
+ solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
20
+ large-scale systems.
21
+
22
+
23
+ Key Responsibilities
24
+ ####################
25
+ 1. Model Development:
26
+ - Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
27
+ summarization, question answering, and classification.
28
+ - Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
29
+
30
+ 2. Data Management:
31
+ - Curate, preprocess, and manage large datasets for training and evaluation.
32
+ - Ensure data quality by cleaning, augmenting, and annotating datasets.
33
+
34
+ 3. Infrastructure & Deployment:
35
+ - Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
36
+ - Optimize inference speed and memory usage for production-grade applications.
37
+
38
+ 4. Model Evaluation:
39
+ - Develop benchmarks to evaluate model performance, fairness, and safety.
40
+ - Implement guardrails to mitigate bias and ensure ethical use of AI systems.
41
+
42
+ 5. Collaboration:
43
+ - Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
44
+ - Provide mentorship to junior team members and contribute to knowledge sharing within the team.
45
+
46
+ 6. Research & Innovation:
47
+ - Stay updated on the latest research in NLP and deep learning.
48
+ - Contribute to academic papers, patents, or open-source projects where appropriate.
49
+
50
+
51
+ Requirements
52
+ ############
53
+ 1. Technical Skills:
54
+ - Strong programming skills in Python.
55
+ - Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
56
+ - Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
57
+ - Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
58
+ - Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
59
+ - Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
60
+ - Expertise in working with APIs for integrating LLMs into production systems.
61
+
62
+ 2. Educational Background:
63
+ - Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
64
+
65
+ 3. Experience:
66
+ - 3+ years of experience in NLP, machine learning, or a related field.
67
+ - Demonstrated success in building and deploying LLM-powered applications.
68
+ - Contributions to open-source projects or research publications in NLP are a plus.
69
+
70
+ 4. Soft Skills:
71
+ - Strong problem-solving abilities and attention to detail.
72
+ - Excellent communication and collaboration skills to work with cross-functional teams.
73
+ - Adaptable, with a passion for continuous learning and innovation.
74
+ - A proactive and goal-oriented mindset.
75
+
76
+ 5. Target Personalities:
77
+ - Innovative Thinker: Always exploring new ways to improve model performance and usability.
78
+ - Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
79
+ - Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
80
+ - Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
81
+ - Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
82
+
83
+
84
+ Preferred Qualifications:
85
+ #########################
86
+ - Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
87
+ - Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
88
+ - Familiarity with multi-modal LLMs and their integration.
89
+ - Experience working in cloud environments like AWS, Azure, or GCP.
90
+ - Contributions to community forums, blogs, or conferences related to LLMs or NLP.
91
+
92
+ What We Offer
93
+ #############
94
+ - Competitive salary and benefits package.
95
+ - Opportunities to work on groundbreaking AI projects.
96
+ - Flexible work environment, including remote options.
97
+ - Access to cutting-edge resources and infrastructure for AI development.
98
+ """
src/configs/database/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/configs/database/__pycache__/firebase.cpython-312.pyc ADDED
Binary file (6.66 kB). View file
 
src/configs/database/firebase.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import credentials, db
3
+ import json
4
+ import os
5
+ import pandas as pd
6
+ import numpy as np
7
+ from dotenv import load_dotenv
8
+ from pathlib import Path
9
+ from uuid_extensions import uuid7
10
+ from datetime import datetime
11
+
12
+ env_path = Path(__file__).resolve().parent.parent.parent.parent / ".env"
13
+ load_dotenv(dotenv_path=env_path)
14
+
15
+ firebase_service_key_str = os.getenv("FIREBASE_API_KEY")
16
+
17
+ if not firebase_service_key_str:
18
+ raise ValueError("Service account key is not set in the environment variables.")
19
+
20
+ service_account_key = json.loads(firebase_service_key_str)
21
+
22
+ service_account_key_path = "/tmp/serviceAccountKey.json"
23
+ with open(service_account_key_path, "w") as temp_key_file:
24
+ json.dump(service_account_key, temp_key_file)
25
+
26
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_key_path
27
+
28
+ if not firebase_admin._apps: # Check if already initialized
29
+ cred = credentials.Certificate(service_account_key_path)
30
+ firebase_admin.initialize_app(
31
+ cred,
32
+ {
33
+ "databaseURL": "https://automated-interview-filtering-default-rtdb.asia-southeast1.firebasedatabase.app"
34
+ },
35
+ )
36
+
37
+ ref = db.reference("interview_results/")
38
+ users_ref = ref.child("users")
39
+
40
+
41
+ def write_user_data(
42
+ name, score, interview_question, job_title, job_requirements, feedback
43
+ ):
44
+ """
45
+ Writes user data to Firebase database with UUID v7.
46
+
47
+ Args:
48
+ :param name: Name of the user
49
+ :param score: Interview score
50
+ :param interview_question: Question asked during interview
51
+ :param job_title: Job title
52
+ :param job_requirements: Job requirements
53
+ :param feedback: Feedback for the user
54
+
55
+ Returns:
56
+ :return: UUID of the newly created record
57
+ """
58
+ try:
59
+ # Generate UUID v7
60
+ entry_id = str(uuid7())
61
+ timestamp = datetime.now().isoformat()
62
+
63
+ if isinstance(score, np.int64):
64
+ score = int(score)
65
+ elif isinstance(score, (float, np.float64)):
66
+ score = int(round(score))
67
+
68
+ user_data = {
69
+ "id": entry_id,
70
+ "name": name,
71
+ "score": score,
72
+ "interview_question": interview_question,
73
+ "job_title": job_title,
74
+ "job_requirements": job_requirements,
75
+ "feedback": feedback,
76
+ "created_at": timestamp,
77
+ "updated_at": timestamp,
78
+ }
79
+
80
+ # Create a new entry using the UUID as the key
81
+ users_ref.child(entry_id).set(user_data)
82
+ print(f"Data for {name} successfully written to Firebase with ID: {entry_id}")
83
+ return entry_id
84
+
85
+ except Exception as e:
86
+ print(f"Error writing data to Firebase: {str(e)}")
87
+ raise
88
+
89
+
90
+ def read_all_users():
91
+ """
92
+ Reads all user data from Firebase database and returns as a pandas DataFrame.
93
+
94
+ Returns:
95
+ :return pandas.DataFrame: DataFrame containing all user records with Firebase keys as index
96
+ """
97
+ try:
98
+ users = users_ref.get()
99
+ if not users:
100
+ print("No users found in the database.")
101
+ return pd.DataFrame()
102
+
103
+ # Convert Firebase data to DataFrame
104
+ df = pd.DataFrame.from_dict(users, orient="index")
105
+
106
+ # Reset index and rename it to 'firebase_key'
107
+ df = df.reset_index().rename(columns={"index": "firebase_key"})
108
+
109
+ # Reorder columns to put id and timestamps first
110
+ preferred_order = [
111
+ "firebase_key",
112
+ "id",
113
+ "created_at",
114
+ "updated_at",
115
+ "name",
116
+ "score",
117
+ "interview_question",
118
+ "job_title",
119
+ "job_requirements",
120
+ "feedback",
121
+ ]
122
+ actual_columns = [col for col in preferred_order if col in df.columns]
123
+ remaining_columns = [col for col in df.columns if col not in preferred_order]
124
+ df = df[actual_columns + remaining_columns]
125
+
126
+ # Convert timestamps to datetime
127
+ if "created_at" in df.columns:
128
+ df["created_at"] = pd.to_datetime(df["created_at"])
129
+ if "updated_at" in df.columns:
130
+ df["updated_at"] = pd.to_datetime(df["updated_at"])
131
+
132
+ # stream the List[str] feedback to str
133
+ df["feedback"] = df["feedback"].apply(lambda x: " ".join(x))
134
+
135
+ df_filtered = df[
136
+ ["name", "job_title", "interview_question", "score", "feedback"]
137
+ ]
138
+
139
+ return df_filtered
140
+
141
+ except Exception as e:
142
+ print(f"Error reading data from Firebase: {str(e)}")
143
+ raise
144
+
145
+
146
+ def update_user_data(uuid, update_dict):
147
+ """
148
+ Updates existing user data in Firebase database.
149
+
150
+ Args:
151
+ :param update_dict: Dictionary containing fields to update
152
+ :param uuid: UUID of the record to update
153
+
154
+ Returns:
155
+ :return bool: True if update successful, False otherwise
156
+ """
157
+ try:
158
+ # Get current data
159
+ current_data = users_ref.child(uuid).get()
160
+
161
+ if not current_data:
162
+ print(f"No record found with UUID: {uuid}")
163
+ return False
164
+
165
+ # Update the timestamp
166
+ update_dict["updated_at"] = datetime.now().isoformat()
167
+
168
+ # Update only the specified fields
169
+ users_ref.child(uuid).update(update_dict)
170
+ print(f"Successfully updated record with UUID: {uuid}")
171
+ return True
172
+
173
+ except Exception as e:
174
+ print(f"Error updating data in Firebase: {str(e)}")
175
+ raise
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PROVIDER: nvidia
2
+ BASE_URL: https://integrate.api.nvidia.com/v1
3
+ MODEL: nvidia/llama-3.1-nemotron-70b-instruct
4
+ TEMPERATURE: 0
src/configs/llm/openai-gpt-3.5-turbo.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PROVIDER: openai
2
+ BASE_URL: default
3
+ MODEL: gpt-3.5-turbo
4
+ TEMPERATURE: 0
src/configs/llm/openai-gpt-4o-mini.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PROVIDER: openai
2
+ BASE_URL: default
3
+ MODEL: gpt-4o-mini
4
+ TEMPERATURE: 0
src/configs/parser/llamaparse_en.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ LANGUAGE: en
2
+ DISABLE_OCR: false
3
+ PAGE_ROC_BBOX:
4
+ TOP: 0
5
+ RIGHT: 0
6
+ BOTTOM: 0
7
+ LEFT: 0
src/domain/candidate.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import Dict, List
3
+
4
+
5
+ @dataclass
6
+ class Candidate:
7
+ id: str
8
+ name: str
9
+ email: str
10
+ resume_data: Dict
11
+ interview_responses: List[str]
12
+ emotional_metrics: Dict
13
+ feedback: Dict
src/domain/enums/__pycache__/emotion_types.cpython-312.pyc ADDED
Binary file (1.17 kB). View file
 
src/domain/enums/emotion_types.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+
4
+ class EmotionType(Enum):
5
+
6
+ SAD = "sad"
7
+ FEAR = "fear"
8
+ ANGRY = "angry"
9
+ DISGUST = "disgust"
10
+
11
+ HAPPY = "happy"
12
+ NEUTRAL = "neutral"
13
+ SURPRISE = "surprise"
14
+
15
+ @classmethod
16
+ def get_positive_emotions(cls):
17
+ return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
18
+
19
+ @classmethod
20
+ def get_negative_emotions(cls):
21
+ return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]
src/domain/enums/interview_status.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum, auto
2
+
3
+
4
+ class InterviewStatus(Enum):
5
+ SCHEDULED = auto()
6
+ IN_PROGRESS = auto()
7
+ COMPLETED = auto()
8
+ CANCELLED = auto()
9
+ PENDING_REVIEW = auto()
10
+ REVIEWED = auto()
11
+ FAILED = auto()
src/domain/interview.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import List, Dict
4
+ from src.domain.enums.interview_status import InterviewStatus
5
+ from src.domain.enums.emotion_types import EmotionType
6
+
7
+
8
+ @dataclass
9
+ class Interview:
10
+ id: str
11
+ candidate_id: str
12
+ job_id: str
13
+ video_path: str
14
+ status: InterviewStatus
15
+ questions: List[str]
16
+ responses_transcription: List[str]
17
+ timestamp: datetime
18
+ duration: int
19
+ emotional_analysis: Dict[EmotionType, float]
20
+
21
+ def is_completed(self) -> bool:
22
+ return self.status == InterviewStatus.COMPLETED
23
+
24
+ def is_reviewable(self) -> bool:
25
+ return self.status in [
26
+ InterviewStatus.COMPLETED,
27
+ InterviewStatus.PENDING_REVIEW,
28
+ ]
src/llm/__pycache__/base_llm_provider.cpython-312.pyc ADDED
Binary file (996 Bytes). View file
 
src/llm/__pycache__/enums.cpython-312.pyc ADDED
Binary file (274 Bytes). View file
 
src/llm/__pycache__/llm.cpython-312.pyc ADDED
Binary file (1.25 kB). View file
 
src/llm/__pycache__/nvidia_llm.cpython-312.pyc ADDED
Binary file (1.4 kB). View file
 
src/llm/__pycache__/openai_llm.cpython-312.pyc ADDED
Binary file (1.34 kB). View file
 
src/llm/base_llm_provider.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Base class for LLM providers"""
2
+
3
+ from abc import abstractmethod
4
+ from typing import Dict, Optional
5
+
6
+
7
+ class BaseLLMProvider:
8
+ @abstractmethod
9
+ def __init__(self):
10
+ """LLM provider initialization"""
11
+ raise NotImplementedError
12
+
13
+ @abstractmethod
14
+ def complete(self, prompt: str = "") -> str:
15
+ """LLM chat completion implementation by each provider"""
16
+ raise NotImplementedError
src/llm/enums.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_LLM = "openai"
2
+ NVIDIA_LLM = "nvidia"
3
+ DEFAULT_LLM_API_BASE = "default"
src/llm/llm.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+
3
+ from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
4
+ from src.llm.base_llm_provider import BaseLLMProvider
5
+ from src.llm.openai_llm import OpenAILLM
6
+ from src.llm.nvidia_llm import NvidiaLLM
7
+
8
+
9
+ def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
10
+ """
11
+ Initiates LLM client from config file
12
+ """
13
+
14
+ # load config
15
+ with open(config_file_path, "r") as f:
16
+ config = yaml.safe_load(f)
17
+
18
+ # init & return llm
19
+ if config["PROVIDER"] == OPENAI_LLM:
20
+ return OpenAILLM(
21
+ model=config["MODEL"],
22
+ temperature=config["TEMPERATURE"],
23
+ base_url=config["BASE_URL"],
24
+ )
25
+ elif config["PROVIDER"] == NVIDIA_LLM:
26
+ return NvidiaLLM(
27
+ model=config["MODEL"],
28
+ temperature=config["TEMPERATURE"],
29
+ base_url=config["BASE_URL"],
30
+ )
31
+ else:
32
+ raise ValueError(config["MODEL"])
src/llm/nvidia_llm.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """NVIDIA LLM Implementation"""
2
+
3
+ from llama_index.llms.nvidia import NVIDIA
4
+
5
+ from src.llm.base_llm_provider import BaseLLMProvider
6
+ from src.llm.enums import DEFAULT_LLM_API_BASE
7
+
8
+
9
+ class NvidiaLLM(BaseLLMProvider):
10
+ def __init__(
11
+ self,
12
+ model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
13
+ temperature: float = 0.0,
14
+ base_url: str = "https://integrate.api.nvidia.com/v1",
15
+ ):
16
+ """Initiate NVIDIA client"""
17
+
18
+ if base_url == DEFAULT_LLM_API_BASE:
19
+ self._client = NVIDIA(
20
+ model=model,
21
+ temperature=temperature,
22
+ )
23
+ else:
24
+ self._client = NVIDIA(
25
+ model=model, temperature=temperature, base_url=base_url
26
+ )
27
+
28
+ def complete(self, prompt: str = "") -> str:
29
+ return str(self._client.complete(prompt))
src/llm/openai_llm.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenAI LLM Implementation"""
2
+
3
+ from llama_index.llms.openai import OpenAI
4
+
5
+ from src.llm.base_llm_provider import BaseLLMProvider
6
+ from src.llm.enums import DEFAULT_LLM_API_BASE
7
+
8
+
9
+ class OpenAILLM(BaseLLMProvider):
10
+ def __init__(
11
+ self,
12
+ model: str = "gpt-4o-mini",
13
+ temperature: float = 0.0,
14
+ base_url: str = DEFAULT_LLM_API_BASE,
15
+ ):
16
+ """Initiate OpenAI client"""
17
+
18
+ if base_url == DEFAULT_LLM_API_BASE:
19
+ self._client = OpenAI(
20
+ model=model,
21
+ temperature=temperature,
22
+ )
23
+ else:
24
+ self._client = OpenAI(
25
+ model=model, temperature=temperature, base_url=base_url
26
+ )
27
+
28
+ def complete(self, prompt: str = "") -> str:
29
+ return str(self._client.complete(prompt))
src/output/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/output/report.docx ADDED
Binary file (39.4 kB). View file
 
src/service/__pycache__/emotion_recognition.cpython-312.pyc ADDED
Binary file (7.31 kB). View file
 
src/service/__pycache__/resume_parser.cpython-312.pyc ADDED
Binary file (2.12 kB). View file
 
src/service/emotion_recognition.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from deepface import DeepFace
3
+
4
+ from src.domain.enums.emotion_types import EmotionType
5
+
6
+
7
+ class EmotionRecognition:
8
+ def __init__(self):
9
+ pass
10
+
11
+ @classmethod
12
+ def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
13
+ """
14
+ Performs facial emotion detection using the DeepFace model
15
+ """
16
+ emotions = []
17
+ for frame in frames:
18
+ frame_result = DeepFace.analyze(
19
+ frame, actions=["emotion"], enforce_detection=False
20
+ )
21
+ emotions.append(frame_result)
22
+
23
+ return emotions
24
+
25
+ @classmethod
26
+ def process_emotions(cls, emotions: list) -> dict:
27
+ """
28
+ Processes the emotions by calculating the overall confidence score using a
29
+ custom weighted emotion balancing algorithm.
30
+
31
+ Returns:
32
+ - weighted normalized score
33
+ - signed, weighted normalized score
34
+ - confidence score
35
+ """
36
+
37
+ count = 0
38
+ emots = {
39
+ str(EmotionType.SAD.value): 0,
40
+ str(EmotionType.FEAR.value): 0,
41
+ str(EmotionType.ANGRY.value): 0,
42
+ str(EmotionType.DISGUST.value): 0,
43
+ str(EmotionType.HAPPY.value): 0,
44
+ str(EmotionType.NEUTRAL.value): 0,
45
+ str(EmotionType.SURPRISE.value): 0,
46
+ }
47
+
48
+ for frame_result in emotions:
49
+ if len(frame_result) > 0:
50
+ emot = frame_result[0]["emotion"]
51
+ emots[str(EmotionType.SAD.value)] = (
52
+ emots.get(str(EmotionType.SAD.value), 0)
53
+ + emot[str(EmotionType.SAD.value)]
54
+ )
55
+ emots[str(EmotionType.FEAR.value)] = (
56
+ emots.get(str(EmotionType.FEAR.value), 0)
57
+ + emot[str(EmotionType.FEAR.value)]
58
+ )
59
+ emots[str(EmotionType.ANGRY.value)] = (
60
+ emots.get(str(EmotionType.ANGRY.value), 0)
61
+ + emot[str(EmotionType.ANGRY.value)]
62
+ )
63
+ emots[str(EmotionType.DISGUST.value)] = (
64
+ emots.get(str(EmotionType.DISGUST.value), 0)
65
+ + emot[str(EmotionType.DISGUST.value)]
66
+ )
67
+ emots[str(EmotionType.HAPPY.value)] = (
68
+ emots.get(str(EmotionType.HAPPY.value), 0)
69
+ + emot[str(EmotionType.HAPPY.value)]
70
+ )
71
+ emots[str(EmotionType.NEUTRAL.value)] = (
72
+ emots.get(str(EmotionType.NEUTRAL.value), 0)
73
+ + emot[str(EmotionType.NEUTRAL.value)]
74
+ )
75
+ emots[str(EmotionType.SURPRISE.value)] = (
76
+ emots.get(str(EmotionType.SURPRISE.value), 0)
77
+ + emot[str(EmotionType.SURPRISE.value)]
78
+ )
79
+ count += 1
80
+
81
+ # prevent zero division
82
+ if count == 0:
83
+ count = 1
84
+
85
+ for i in list(emots.keys()):
86
+ emots[i] /= count * 100
87
+
88
+ # refactor according to custom weightage
89
+ sad_score = emots[str(EmotionType.SAD.value)] * 1.3
90
+ fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
91
+ angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
92
+ disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
93
+ happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
94
+ neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
95
+ surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
96
+
97
+ score_list = [
98
+ sad_score,
99
+ angry_score,
100
+ surprise_score,
101
+ fear_score,
102
+ happy_score,
103
+ disgust_score,
104
+ neutral_score,
105
+ ]
106
+ normalized_scores = cls.__normalize_scores(score_list)
107
+ mean = np.mean(normalized_scores)
108
+
109
+ result_scores = [
110
+ (-sad_score),
111
+ (-angry_score),
112
+ surprise_score,
113
+ (-fear_score),
114
+ happy_score,
115
+ (-disgust_score),
116
+ neutral_score,
117
+ ]
118
+ normalized_result_scores = cls.__normalize_scores(result_scores)
119
+ result = np.mean(normalized_result_scores)
120
+
121
+ difference = abs((mean - result) / mean) * 100
122
+
123
+ # keep values in range of [0, 100]
124
+ difference = min(difference, 50)
125
+
126
+ if mean > result:
127
+ conf = 50 - difference
128
+ else:
129
+ conf = 50 + difference
130
+
131
+ return {"mean": mean, "result": result, "conf": conf}
132
+
133
+ @classmethod
134
+ def __normalize_scores(cls, scores: list) -> list:
135
+ min_val, max_val = min(scores), max(scores)
136
+ return [(score - min_val) / (max_val - min_val) for score in scores]
src/service/resume_parser.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from llama_parse import LlamaParse
3
+ from llama_index.core import SimpleDirectoryReader
4
+
5
+ from src.template.parser_prompt import PARSE_RESUME_PROMPT
6
+
7
+
8
+ class ResumeParser:
9
+ def __init__(self, config_file_path: str = "config.yaml"):
10
+ """
11
+ Initiates a resume parser client
12
+ """
13
+
14
+ # load config
15
+ with open(config_file_path, "r") as f:
16
+ config = yaml.safe_load(f)
17
+
18
+ # set bbox size
19
+ bbox_margin = config["PAGE_ROC_BBOX"]
20
+ bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
21
+
22
+ self._parser = LlamaParse(
23
+ language=config["LANGUAGE"],
24
+ disable_ocr=config["DISABLE_OCR"],
25
+ bounding_box=bbox,
26
+ result_type="markdown",
27
+ parsing_instruction=PARSE_RESUME_PROMPT,
28
+ is_formatting_instruction=False,
29
+ )
30
+
31
+ def parse_resume_to_markdown(self, resume_path: str = "") -> str:
32
+ """
33
+ Parses the resume into markdown text.
34
+
35
+ Supported filetypes:
36
+ - .pdf
37
+ """
38
+ document = SimpleDirectoryReader(
39
+ input_files=[resume_path], file_extractor={".pdf": self._parser}
40
+ ).load_data()
41
+
42
+ return "\n".join([str(d.text) for d in document])
src/template/__pycache__/grading_prompt.cpython-312.pyc ADDED
Binary file (4.54 kB). View file
 
src/template/__pycache__/parser_prompt.cpython-312.pyc ADDED
Binary file (1.59 kB). View file
 
src/template/grading_prompt.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.prompts import PromptTemplate
2
+
3
+ GRADE_RESPONSE_PROMPT = PromptTemplate(
4
+ """
5
+ You are a Human Resource Manager and an interviewer.
6
+ Your task is to review an interviewee's overall performance based on multiple factors.
7
+ You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
8
+
9
+ The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
10
+ Based on this information, evaluate the interviewee’s performance in the following areas:
11
+
12
+ 1. **Answer Quality**:
13
+ Assess the clarity, relevance, and accuracy of their response to the interview question.
14
+ Did the interviewee address the key points effectively?
15
+
16
+ 2. **Problem-Solving Skills**:
17
+ Evaluate how well the interviewee tackled any problem presented in the interview question.
18
+ Were they able to think critically, analyze the situation, and propose solutions?
19
+
20
+ 3. **Confidence**:
21
+ Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
22
+
23
+ 4. **Personality**:
24
+ Consider the tone, communication style, and interpersonal skills of the interviewee.
25
+ How well did they engage with the question and the interview process?
26
+ Do they demonstrate qualities like openness, empathy, or assertiveness?
27
+
28
+ 5. **Overall Performance**:
29
+ Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
30
+ Offer feedback on strengths and areas for improvement.
31
+
32
+ Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
33
+
34
+
35
+ ########################################
36
+ Interview Question:
37
+ {interview_question}
38
+
39
+ ########################################
40
+ Interviewee's Facial Confidence Score:
41
+ {conf_score}
42
+
43
+ ########################################
44
+ Interviewee's response in text:
45
+ {response_text}
46
+
47
+ ########################################
48
+ output:
49
+ """
50
+ )
51
+
52
+
53
+ RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
54
+ """
55
+ You are an HR specialist evaluating an interviewee for a specific role.
56
+ Your task is to assess the suitability of the interviewee based on the following information:
57
+
58
+ 1. **Job Requirements**:
59
+ A list of skills, experiences, and qualifications required for the role.
60
+
61
+ 2. **Interview Feedback**:
62
+ The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
63
+
64
+ 3. **Resume Text**:
65
+ A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
66
+
67
+ Using these inputs, generate an output strictly in the following YAML format:
68
+
69
+ ###########################
70
+ name: <name>
71
+ score: <score>
72
+ feedback: <feedback text>
73
+ ###########################
74
+
75
+
76
+ Details for the output:
77
+ 1. **name**:
78
+ Name of the interviewee.
79
+
80
+ 2. **score**:
81
+ A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
82
+
83
+ 3. **feedback**:
84
+ - A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
85
+ - Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
86
+ - Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
87
+
88
+ Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
89
+
90
+
91
+ ########################################
92
+ Job Requirements:
93
+ {job_requirements}
94
+
95
+ ########################################
96
+ Interview Feedback:
97
+ {interview_feedback}
98
+
99
+ ########################################
100
+ Resume Text:
101
+ {resume_text}
102
+
103
+ ########################################
104
+
105
+ Output strictly following the below YAML format:
106
+
107
+ name: <name>
108
+ score: <score>
109
+ feedback: <feedback text>
110
+ """
111
+ )
src/template/parser_prompt.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.prompts import PromptTemplate
2
+
3
+ PARSE_RESUME_PROMPT = """
4
+ You are tasked with parsing a resume.
5
+
6
+ **Your Focus**:
7
+ - Reproduce only the main body text, including section headers and bullet points, exactly as received.
8
+ - Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
9
+ - Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
10
+ - '# 14 Experience'
11
+ - '# 2 Education'
12
+ Invalid examples are as below:
13
+ - '# 14\n # Experience'
14
+ - '# 2\n # Education'
15
+ - You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
16
+ - Do not make up any text and headers that are not present in the original text.
17
+
18
+ **Expected Output**:
19
+ - Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
20
+ - Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
21
+ """
src/utils/__pycache__/utils.cpython-312.pyc ADDED
Binary file (3.94 kB). View file
 
src/utils/utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import yaml
3
+ import numpy as np
4
+ from pathlib import Path
5
+ import speech_recognition as sr
6
+ from moviepy import VideoFileClip
7
+
8
+
9
+ def extract_audio(
10
+ input_video_file: str = "",
11
+ output_audio_file: str = "",
12
+ ) -> str:
13
+ """
14
+ Extracts audio from input video file, and save it to the respective path.
15
+ Returns the path to the saved audio file if extraction is successful.
16
+ Supported input video file formats are:
17
+ - .mp4
18
+ - .mov
19
+
20
+ Supported output audio file formats are:
21
+ - .wav
22
+ """
23
+ try:
24
+ input_video_file = str(Path(input_video_file))
25
+ output_audio_file = str(Path(output_audio_file))
26
+
27
+ # Load the video file
28
+ video = VideoFileClip(input_video_file)
29
+
30
+ # Extract audio and write to output file
31
+ video.audio.write_audiofile(output_audio_file)
32
+
33
+ print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
34
+
35
+ return output_audio_file
36
+ except Exception as e:
37
+ print(e)
38
+ return None
39
+
40
+
41
+ def audio2text(audio_file: str = "") -> str:
42
+ """
43
+ Converts audio to text using Google's text-to-audio engine (Local),
44
+ and returns the text.
45
+ """
46
+ r = sr.Recognizer()
47
+ with sr.AudioFile(audio_file) as source:
48
+ audio = r.record(source)
49
+ text = r.recognize_google(audio)
50
+ return text
51
+
52
+
53
+ def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
54
+ """
55
+ Samples one frame every 'sample_rate' frames from the video file and returns
56
+ them in the form of a list of Numpy ndarray objects.
57
+ """
58
+ cap = cv2.VideoCapture(input_video_file)
59
+ frames = []
60
+ count = 0
61
+
62
+ while cap.isOpened():
63
+ ret, frame = cap.read()
64
+ if not ret:
65
+ break
66
+ if count % sample_rate == 0:
67
+ frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
68
+ count += 1
69
+ cap.release()
70
+
71
+ return frames
72
+
73
+
74
+ def parse_yaml_string(
75
+ yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
76
+ ) -> dict:
77
+ """
78
+ Parses a YAML string into a Python dictionary based on a list of
79
+ expected keys.
80
+ """
81
+
82
+ # removes ```YAML ``` heading and footers if present
83
+ if cleanup:
84
+ yaml_string = yaml_string.replace("YAML", "")
85
+ yaml_string = yaml_string.replace("yaml", "")
86
+ yaml_string = yaml_string.replace("`", "")
87
+
88
+ try:
89
+ parsed_data = yaml.safe_load(yaml_string)
90
+
91
+ # Handle missing keys with error handling
92
+ result = {}
93
+ for key in expected_keys:
94
+ if key in parsed_data:
95
+ result[key] = parsed_data[key]
96
+ else:
97
+ print(f"[parse_yaml_string()] : Missing key {key}")
98
+
99
+ return result
100
+
101
+ except KeyError as e:
102
+ print(e)
103
+ return None