Spaces:

RainPoo
/

Automated-Interview-Filtering

Sleeping

App Files Files

xet

Community

RainPoo commited on Dec 22, 2024

Commit

1ff6afc

verified ·

1 Parent(s): 50406c0

Upload 41 files

Browse files

Files changed (41) hide show

src/.DS_Store +0 -0
src/.gradio/certificate.pem +31 -0
src/app.py +432 -0
src/archive/__pycache__/main_test.cpython-312-pytest-8.3.4.pyc +0 -0
src/archive/__pycache__/sample_inputs.cpython-312.pyc +0 -0
src/archive/main_test.py +97 -0
src/archive/sample_inputs.py +98 -0
src/configs/database/.DS_Store +0 -0
src/configs/database/__pycache__/firebase.cpython-312.pyc +0 -0
src/configs/database/firebase.py +175 -0
src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml +4 -0
src/configs/llm/openai-gpt-3.5-turbo.yaml +4 -0
src/configs/llm/openai-gpt-4o-mini.yaml +4 -0
src/configs/parser/llamaparse_en.yaml +7 -0
src/domain/candidate.py +13 -0
src/domain/enums/__pycache__/emotion_types.cpython-312.pyc +0 -0
src/domain/enums/emotion_types.py +21 -0
src/domain/enums/interview_status.py +11 -0
src/domain/interview.py +28 -0
src/llm/__pycache__/base_llm_provider.cpython-312.pyc +0 -0
src/llm/__pycache__/enums.cpython-312.pyc +0 -0
src/llm/__pycache__/llm.cpython-312.pyc +0 -0
src/llm/__pycache__/nvidia_llm.cpython-312.pyc +0 -0
src/llm/__pycache__/openai_llm.cpython-312.pyc +0 -0
src/llm/base_llm_provider.py +16 -0
src/llm/enums.py +3 -0
src/llm/llm.py +32 -0
src/llm/nvidia_llm.py +29 -0
src/llm/openai_llm.py +29 -0
src/output/.DS_Store +0 -0
src/output/report.docx +0 -0
src/service/__pycache__/emotion_recognition.cpython-312.pyc +0 -0
src/service/__pycache__/resume_parser.cpython-312.pyc +0 -0
src/service/emotion_recognition.py +136 -0
src/service/resume_parser.py +42 -0
src/template/__pycache__/grading_prompt.cpython-312.pyc +0 -0
src/template/__pycache__/parser_prompt.cpython-312.pyc +0 -0
src/template/grading_prompt.py +111 -0
src/template/parser_prompt.py +21 -0
src/utils/__pycache__/utils.cpython-312.pyc +0 -0
src/utils/utils.py +103 -0

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

src/app.py ADDED Viewed

	@@ -0,0 +1,432 @@

+import gradio as gr
+import pandas as pd
+import logging
+from pathlib import Path
+from docx import Document
+from typing import Optional, List
+from dataclasses import dataclass
+from dotenv import load_dotenv
+from src.archive.sample_inputs import INTERVIEW_QUESTION, JOB_REQUIREMENTS
+from src.configs.database.firebase import write_user_data, read_all_users
+from src.llm.llm import get_llm
+from src.service.emotion_recognition import EmotionRecognition
+from src.service.resume_parser import ResumeParser
+from src.utils.utils import (
+    parse_yaml_string,
+    extract_audio,
+    audio2text,
+    sample_frames,
+)
+from src.template.grading_prompt import (
+    GRADE_RESPONSE_PROMPT,
+    RANKING_AND_FEEDBACK_PROMPT,
+)
+load_dotenv()
+# ENVIRONMENT = os.getenv("ENVIRONMENT", "local")
+# Define base paths dynamically
+# if ENVIRONMENT == "local":
+#     BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent
+# else:  # Assume hosted on Hugging Face Spaces
+BASE_DIR = Path(".").resolve()
+LLM_CONFIG_FILE = BASE_DIR / "configs/llm/openai-gpt-3.5-turbo.yaml"
+RESUME_PARSER_CONFIG_FILE = BASE_DIR / "configs/parser/llamaparse_en.yaml"
+OUTPUT_AUDIO_FILE_EMPTY = BASE_DIR / "output/audio_output.wav"
+OUTPUT_REPORT_FILE_EMPTY = BASE_DIR / "output/report.docx"
+@dataclass
+class ProcessingResult:
+    candidate_name: Optional[str] = None
+    candidate_score: Optional[int] = None
+    candidate_feedbacks: Optional[List[str]] = None
+    feedback_md: Optional[str] = None
+    interview_question: Optional[str] = None
+    job_requirements: Optional[str] = None
+    error_message: Optional[str] = None
+class GradioInterface:
+    VALID_VIDEO_EXTENSIONS = {".mp4", ".avi", ".mkv"}
+    VALID_RESUME_EXTENSIONS = {".pdf"}
+    def __init__(self):
+        self.parser = None
+        self.llm = None
+        self.logger = None
+        self.candidate_feedback = pd.DataFrame(columns=["Name", "Score", "Feedback"])
+        self.setup_logging()
+        self.initialize_services()
+    def setup_logging(self):
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        )
+        self.logger = logging.getLogger(__name__)
+    def initialize_services(self):
+        try:
+            self.llm = get_llm(str(LLM_CONFIG_FILE))
+            self.parser = ResumeParser(str(RESUME_PARSER_CONFIG_FILE))
+        except Exception as e:
+            self.logger.error(f"Failed to initialize services: {str(e)}")
+            raise
+    def validate_inputs(
+        self,
+        video_path: Optional[str],
+        resume_path: Optional[str],
+        interview_questions: Optional[str],
+        job_requirements: Optional[str],
+    ) -> Optional[str]:
+        if not video_path:
+            return "Please upload an interview video."
+        if not resume_path:
+            return "Please upload a resume (PDF)."
+        if not interview_questions:
+            return "Please provide interview questions."
+        if not job_requirements:
+            return "Please provide job requirements."
+        if not self._validate_file_format(video_path, self.VALID_VIDEO_EXTENSIONS):
+            return "Invalid video format."
+        if not self._validate_file_format(resume_path, self.VALID_RESUME_EXTENSIONS):
+            return "Please submit resume in PDF format."
+        return None
+    def _validate_file_format(self, file_path: str, valid_extensions: set) -> bool:
+        return isinstance(file_path, str) and any(
+            file_path.lower().endswith(ext) for ext in valid_extensions
+        )
+    def process_video(self, video_path: str) -> Optional[str]:
+        OUTPUT_AUDIO_FILE = extract_audio(video_path, str(OUTPUT_AUDIO_FILE_EMPTY))
+        audio_text = audio2text(OUTPUT_AUDIO_FILE)
+        return audio_text
+    def analyze_emotions(self, video_path: str) -> Optional[str]:
+        frames = sample_frames(video_path, sample_rate=8)
+        emotions = EmotionRecognition.detect_face_emotions(frames)
+        emotions_dict = EmotionRecognition.process_emotions(emotions)
+        conf_score = emotions_dict["conf"]
+        return conf_score
+    def process_resume(self, resume_path: str) -> Optional[str]:
+        resume_md = self.parser.parse_resume_to_markdown(resume_path)
+        return resume_md
+    def format_feedback_to_markdown(self, feedback_df: pd.DataFrame) -> str:
+        if feedback_df.empty:
+            return "No feedback available."
+        name = feedback_df["Name"].iloc[0]
+        score = feedback_df["Score"].iloc[0]
+        # Start with header
+        markdown_text = f"""
+# Candidate Assessment Report 📝
+## Candidate Name ✨
+{name}
+## Candidate Overall Score  🎯
+{score}/100
+## Detailed Feedback  🛠️
+"""
+        for idx, row in feedback_df.iterrows():
+            markdown_text += f"- {row['Feedback']}\n\n"
+        return markdown_text
+    def get_feedback(
+        self,
+        itv_question: str,
+        job_requirements: str,
+        conf_score: str,
+        audio_text: str,
+        resume_md: str,
+    ) -> pd.DataFrame:
+        formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
+            interview_question=itv_question,
+            conf_score=conf_score,
+            response_text=audio_text,
+        )
+        grade = self.llm.complete(formatted_grading_prompt)
+        formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
+            job_requirements=job_requirements,
+            interview_feedback=grade,
+            resume_text=resume_md,
+        )
+        rank_and_feedback = self.llm.complete(formatted_ranking_prompt)
+        expected_keys = ["name", "score", "feedback"]
+        rank_and_feedback_dict = parse_yaml_string(
+            yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
+        )
+        return pd.DataFrame(
+            {
+                "Name": rank_and_feedback_dict["name"],
+                "Score": rank_and_feedback_dict["score"],
+                "Feedback": rank_and_feedback_dict["feedback"],
+            }
+        )
+    def process_submission(
+        self,
+        video_path: str,
+        resume_path: str,
+        interview_questions: str,
+        job_title: str,
+        job_requirements: str,
+    ) -> ProcessingResult:
+        try:
+            # Validate inputs
+            error_message = self.validate_inputs(
+                video_path, resume_path, interview_questions, job_requirements
+            )
+            if error_message:
+                return ProcessingResult(error_message=error_message)
+            # Process inputs
+            video_transcript = self.process_video(video_path)
+            emotion_analysis = self.analyze_emotions(video_path)
+            resume_analysis = self.process_resume(resume_path)
+            feedback_list = self.get_feedback(
+                interview_questions,
+                job_requirements,
+                emotion_analysis,
+                video_transcript,
+                resume_analysis,
+            )
+            # Update feedback database
+            self.candidate_feedback = pd.concat(
+                [self.candidate_feedback, feedback_list], ignore_index=True
+            )
+            # TODO: For testing purposes
+            # job_title = "LLM Engineer"
+            # interview_questions = INTERVIEW_QUESTION
+            # job_requirements = JOB_REQUIREMENTS
+            # self.candidate_feedback = pd.DataFrame(
+            #     {
+            #         "Name": ["Goh Yi Xian"] * 4,
+            #         "Score": [50, 50, 50, 50],
+            #         "Feedback": [
+            #             "The interviewee's technical skills align partially with the job requirements, showcasing proficiency in deep learning frameworks like PyTorch and TensorFlow. However, there is a lack of experience in training and fine-tuning transformer-based models and working with MLOps tools for deployment.",
+            #             "The educational background meets the criteria with a Bachelor's degree in Computer Science, but the lack of a Ph.D. and limited industry experience may hinder full alignment with the role.",
+            #             "The interview performance indicates a need for improvement in problem-solving skills, confidence, and engagement. The response lacked clarity, relevance, and demonstrated understanding of the key aspects of the job requirements.",
+            #             "Overall, while there are some matching skills and experiences, the interviewee falls short in demonstrating a comprehensive fit for the LLM Engineer position. Further development in technical expertise, problem-solving abilities, and communication skills is recommended.",
+            #         ],
+            #     }
+            # )
+            write_user_data(
+                self.candidate_feedback["Name"].iloc[0],
+                self.candidate_feedback["Score"].iloc[0],
+                interview_questions,
+                job_title,
+                job_requirements,
+                self.candidate_feedback["Feedback"].tolist(),
+            )
+            feedback_md = self.format_feedback_to_markdown(self.candidate_feedback)
+            return ProcessingResult(
+                candidate_name=self.candidate_feedback["Name"].iloc[0],
+                candidate_score=self.candidate_feedback["Score"].iloc[0],
+                candidate_feedbacks=self.candidate_feedback["Feedback"].tolist(),
+                feedback_md=feedback_md,
+                interview_question=interview_questions,
+                job_requirements=job_requirements,
+            )
+        except Exception as e:
+            self.logger.error(f"Error in process_submission: {str(e)}")
+            return ProcessingResult(
+                error_message=f"An error occurred during processing: {str(e)}"
+            )
+    def save_report(
+        self,
+        candidate_name,
+        candidate_score,
+        candidate_feedback,
+        interview_question,
+        job_requirements,
+    ) -> Optional[str]:
+        try:
+            if self.candidate_feedback.empty:
+                return None
+            doc = Document()
+            doc.add_heading(f"Interview Analysis Report - {candidate_name}", 0)
+            doc.add_heading("Interview Questions", 1)
+            doc.add_paragraph(interview_question)
+            doc.add_heading("Job Requirements", 1)
+            doc.add_paragraph(job_requirements)
+            doc.add_heading("Overall Score", 1)
+            paragraph = doc.add_paragraph()
+            paragraph.add_run(f"{candidate_score}/100").bold = True
+            doc.add_heading("Detailed Feedback", 1)
+            for feedback in candidate_feedback:
+                doc.add_paragraph(f"• {feedback}")
+            doc.save(str(OUTPUT_REPORT_FILE_EMPTY))
+            return str(OUTPUT_REPORT_FILE_EMPTY)
+        except Exception as e:
+            self.logger.error(f"Error saving report: {str(e)}")
+            return None
+    def create_interface(self) -> gr.Blocks:
+        theme = gr.themes.Ocean(
+            primary_hue="pink",
+            secondary_hue="rose",
+            font="Chalkboard",
+        )
+        with gr.Blocks(title="HR Interview Analysis System", theme=theme) as demo:
+            gr.Markdown("# HR Interview Analysis System")
+            with gr.Row():
+                with gr.Column():
+                    video_input = gr.Video(label="Upload Interview Video", format="mp4")
+                    resume_input = gr.File(
+                        label="Upload Resume (PDF)", file_types=[".pdf"]
+                    )
+            with gr.Row():
+                question_input = gr.Textbox(
+                    label="Interview Questions",
+                    lines=5,
+                    placeholder="Enter the interview questions here...",
+                )
+            with gr.Row():
+                job_title_input = gr.Textbox(
+                    label="Job Title",
+                    lines=5,
+                    placeholder="Enter the job title here...",
+                )
+                requirements_input = gr.Textbox(
+                    label="Job Requirements",
+                    lines=5,
+                    placeholder="Enter the job requirements here...",
+                )
+            submit_button = gr.Button("Analyze Interview", variant="primary")
+            # Error message display
+            error_output = gr.Markdown(visible=False)
+            with gr.Tabs():
+                with gr.Tab("Analysis Results"):
+                    feedback_output_md = gr.Markdown(
+                        label="Candidate Assessment",
+                        value="No assessment available yet.",
+                    )
+                    save_button = gr.Button("Generate Report", variant="secondary")
+                    report_output = gr.File(label="Download Report")
+                with gr.Tab("Candidates List"):
+                    candidates_df = gr.Dataframe(
+                        headers=[
+                            "Name",
+                            "Job Title",
+                            "Interview Question",
+                            "Score",
+                            "Feedback",
+                        ],
+                        datatype=["str", "str", "str", "int", "str"],
+                        row_count=(0, "dynamic"),
+                        col_count=(5, "fixed"),
+                        value=read_all_users(),  # Load initial data
+                        interactive=True,
+                        wrap=True,
+                    )
+                    refresh_button = gr.Button("Refresh Candidates List")
+                    refresh_button.click(
+                        fn=lambda: read_all_users(),  # Reload the candidates data
+                        inputs=[],
+                        outputs=[candidates_df],
+                    )
+            candidate_name_state = gr.State()
+            candidate_score_state = gr.State()
+            candidate_feedbacks_state = gr.State()
+            interview_question_state = gr.State()
+            job_requirements_state = gr.State()
+            # Event handlers
+            submit_button.click(
+                fn=lambda video, resume, questions, job_title, requirements: (
+                    lambda result: (
+                        result.candidate_name,
+                        result.candidate_score,
+                        result.candidate_feedbacks,
+                        result.feedback_md,
+                        result.interview_question,
+                        result.job_requirements,
+                        result.error_message,
+                    )
+                )(
+                    self.process_submission(
+                        video, resume, questions, job_title, requirements
+                    )
+                ),
+                inputs=[
+                    video_input,
+                    resume_input,
+                    question_input,
+                    job_title_input,
+                    requirements_input,
+                ],
+                outputs=[
+                    candidate_name_state,
+                    candidate_score_state,
+                    candidate_feedbacks_state,
+                    feedback_output_md,
+                    interview_question_state,
+                    job_requirements_state,
+                    error_output,
+                ],
+            )
+            save_button.click(
+                fn=self.save_report,
+                inputs=[
+                    candidate_name_state,
+                    candidate_score_state,
+                    candidate_feedbacks_state,
+                    interview_question_state,
+                    job_requirements_state,
+                ],
+                outputs=[report_output],
+            )
+        return demo
+def launch_app():
+    app = GradioInterface()
+    interface = app.create_interface()
+    interface.launch(server_name="0.0.0.0", server_port=7860, share=True, debug=True)
+if __name__ == "__main__":
+    launch_app()

src/archive/__pycache__/main_test.cpython-312-pytest-8.3.4.pyc ADDED Viewed

Binary file (3.71 kB). View file

src/archive/__pycache__/sample_inputs.cpython-312.pyc ADDED Viewed

Binary file (5.16 kB). View file

src/archive/main_test.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from dotenv import load_dotenv
+from docx import Document
+from src.llm.llm import get_llm
+from src.service.resume_parser import ResumeParser
+from src.service.emotion_recognition import EmotionRecognition
+from src.utils.utils import (
+    extract_audio,
+    audio2text,
+    sample_frames,
+    parse_yaml_string,
+)
+from src.template.grading_prompt import (
+    GRADE_RESPONSE_PROMPT,
+    RANKING_AND_FEEDBACK_PROMPT,
+)
+# sample input values
+from src.archive.sample_inputs import (
+    VIDEO_PATH,
+    RESUME_PATH,
+    INTERVIEW_QUESTION,
+    JOB_REQUIREMENTS,
+)
+# customise this part
+LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-3.5-turbo.yaml"
+# LLM_CONFIG_FILE = "./src/configs/llm/openai-gpt-4o-mini.yaml"
+# LLM_CONFIG_FILE = "./src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml"
+RESUME_PARSER_CONFIG_FILE = "./src/configs/parser/llamaparse_en.yaml"
+OUTPUT_AUDIO_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/audio_output.wav"  # only supports .wav
+OUTPUT_REPORT_FILE = "/Users/gohyixian/Downloads/test_cases/outputs/report.docx"
+# init API keys as env variables
+load_dotenv()
+# init LLM & resume parser
+llm = get_llm(LLM_CONFIG_FILE)
+parser = ResumeParser(RESUME_PARSER_CONFIG_FILE)
+# 1. extract audio from video
+OUTPUT_AUDIO_FILE = extract_audio(VIDEO_PATH, OUTPUT_AUDIO_FILE)
+assert OUTPUT_AUDIO_FILE is not None, f"Audio extraction failed."
+# 2. audio to text
+audio_text = audio2text(OUTPUT_AUDIO_FILE)
+print(audio_text)
+# 3. extract frames form video
+frames = sample_frames(VIDEO_PATH, sample_rate=8)
+print(frames)
+# 4. deepface extract emotions & compite confidence scores
+emotions = EmotionRecognition.detect_face_emotions(frames)
+emotions_dict = EmotionRecognition.process_emotions(emotions)
+conf_score = emotions_dict["conf"]
+print(emotions_dict)
+# 5. llamaparse parse resume into MD
+resume_md = parser.parse_resume_to_markdown(RESUME_PATH)
+print(resume_md)
+# 6. llm grade question response
+formatted_grading_prompt = GRADE_RESPONSE_PROMPT.format(
+    interview_question=INTERVIEW_QUESTION,
+    conf_score=conf_score,
+    response_text=audio_text,
+)
+grade = llm.complete(formatted_grading_prompt)
+print(grade)
+# 7. llm rank and output final feedback
+formatted_ranking_prompt = RANKING_AND_FEEDBACK_PROMPT.format(
+    job_requirements=JOB_REQUIREMENTS, interview_feedback=grade, resume_text=resume_md
+)
+rank_and_feedback = llm.complete(formatted_ranking_prompt)
+print(rank_and_feedback)
+# 8. save to .docx report
+expected_keys = ["name", "score", "feedback"]
+rank_and_feedback_dict = parse_yaml_string(
+    yaml_string=rank_and_feedback, expected_keys=expected_keys, cleanup=True
+)
+print(rank_and_feedback_dict)
+doc = Document()
+doc.add_heading(f"{rank_and_feedback_dict['name']}", 0)
+doc.add_heading(f"Overall Score: {rank_and_feedback_dict['score']}", 1)
+doc.add_heading(f"Brief Overview", 1)
+doc.add_paragraph(f"{rank_and_feedback_dict['feedback']}")
+# Save the document
+doc.save(OUTPUT_REPORT_FILE)

src/archive/sample_inputs.py ADDED Viewed

	@@ -0,0 +1,98 @@

+RESUME_PATH = "/Users/gohyixian/Downloads/test_cases/CV_2024_24_JUN.pdf"
+VIDEO_PATH = "/Users/gohyixian/Downloads/test_cases/test.mp4"
+INTERVIEW_QUESTION = """
+Can you describe a project where you fine-tuned a transformer-based model (e.g., BERT, GPT, or T5) for a specific application?
+Walk us through your approach to dataset preparation, model optimization, and deployment.
+How did you handle challenges like ensuring the model's performance, scalability, and fairness?
+"""
+JOB_REQUIREMENTS = """
+Job Title: LLM Engineer
+Job Description:
+################
+ - We are seeking a skilled and innovative LLM Engineer to join our AI team. The ideal candidate will
+   have hands-on experience in developing, fine-tuning, and deploying large language models (LLMs) for
+   various applications. You will collaborate with cross-functional teams to deliver cutting-edge AI
+   solutions, leveraging your expertise in natural language processing (NLP), deep learning, and
+   large-scale systems.
+Key Responsibilities
+####################
+1.  Model Development:
+    - Design and fine-tune large language models (e.g., GPT, LLaMA, or similar) for tasks like text generation,
+      summarization, question answering, and classification.
+    - Implement advanced techniques for model optimization, including pruning, quantization, and distillation.
+2.  Data Management:
+    - Curate, preprocess, and manage large datasets for training and evaluation.
+    - Ensure data quality by cleaning, augmenting, and annotating datasets.
+3.  Infrastructure & Deployment:
+    - Build scalable pipelines for training and deploying LLMs using frameworks like PyTorch, TensorFlow, or JAX.
+    - Optimize inference speed and memory usage for production-grade applications.
+4.  Model Evaluation:
+    - Develop benchmarks to evaluate model performance, fairness, and safety.
+    - Implement guardrails to mitigate bias and ensure ethical use of AI systems.
+5.  Collaboration:
+    - Work closely with product managers, data scientists, and software engineers to align model capabilities with business requirements.
+    - Provide mentorship to junior team members and contribute to knowledge sharing within the team.
+6.  Research & Innovation:
+    - Stay updated on the latest research in NLP and deep learning.
+    - Contribute to academic papers, patents, or open-source projects where appropriate.
+Requirements
+############
+1.  Technical Skills:
+    - Strong programming skills in Python.
+    - Proficiency with deep learning frameworks (e.g., PyTorch, TensorFlow, JAX).
+    - Experience in training and fine-tuning transformer-based models (e.g., BERT, GPT, T5).
+    - Familiarity with distributed training techniques and tools like Horovod or DeepSpeed.
+    - Knowledge of vector databases and retrieval-augmented generation (RAG) techniques.
+    - Hands-on experience with MLOps tools (e.g., MLflow, Docker, Kubernetes) for deployment.
+    - Expertise in working with APIs for integrating LLMs into production systems.
+2.  Educational Background:
+    - Bachelor’s or Master’s degree in Computer Science, Artificial Intelligence, Data Science, or a related field. Ph.D. preferred but not required.
+3.  Experience:
+    - 3+ years of experience in NLP, machine learning, or a related field.
+    - Demonstrated success in building and deploying LLM-powered applications.
+    - Contributions to open-source projects or research publications in NLP are a plus.
+4.  Soft Skills:
+    - Strong problem-solving abilities and attention to detail.
+    - Excellent communication and collaboration skills to work with cross-functional teams.
+    - Adaptable, with a passion for continuous learning and innovation.
+    - A proactive and goal-oriented mindset.
+5.  Target Personalities:
+    - Innovative Thinker: Always exploring new ways to improve model performance and usability.
+    - Team Player: Collaborates effectively across diverse teams to deliver AI solutions.
+    - Ethically Minded: Committed to ensuring the ethical and fair use of AI technologies.
+    - Detail-Oriented: Meticulous in coding, data handling, and model evaluation.
+    - Resilient Learner: Thrives in a fast-paced environment, keeping up with advancements in AI research.
+Preferred Qualifications:
+#########################
+- Experience with foundation model APIs (e.g., OpenAI, Hugging Face).
+- Knowledge of reinforcement learning techniques, particularly RLHF (Reinforcement Learning with Human Feedback).
+- Familiarity with multi-modal LLMs and their integration.
+- Experience working in cloud environments like AWS, Azure, or GCP.
+- Contributions to community forums, blogs, or conferences related to LLMs or NLP.
+What We Offer
+#############
+- Competitive salary and benefits package.
+- Opportunities to work on groundbreaking AI projects.
+- Flexible work environment, including remote options.
+- Access to cutting-edge resources and infrastructure for AI development.
+"""

src/configs/database/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/configs/database/__pycache__/firebase.cpython-312.pyc ADDED Viewed

Binary file (6.66 kB). View file

src/configs/database/firebase.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import firebase_admin
+from firebase_admin import credentials, db
+import json
+import os
+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+from pathlib import Path
+from uuid_extensions import uuid7
+from datetime import datetime
+env_path = Path(__file__).resolve().parent.parent.parent.parent / ".env"
+load_dotenv(dotenv_path=env_path)
+firebase_service_key_str = os.getenv("FIREBASE_API_KEY")
+if not firebase_service_key_str:
+    raise ValueError("Service account key is not set in the environment variables.")
+service_account_key = json.loads(firebase_service_key_str)
+service_account_key_path = "/tmp/serviceAccountKey.json"
+with open(service_account_key_path, "w") as temp_key_file:
+    json.dump(service_account_key, temp_key_file)
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_key_path
+if not firebase_admin._apps:  # Check if already initialized
+    cred = credentials.Certificate(service_account_key_path)
+    firebase_admin.initialize_app(
+        cred,
+        {
+            "databaseURL": "https://automated-interview-filtering-default-rtdb.asia-southeast1.firebasedatabase.app"
+        },
+    )
+ref = db.reference("interview_results/")
+users_ref = ref.child("users")
+def write_user_data(
+    name, score, interview_question, job_title, job_requirements, feedback
+):
+    """
+    Writes user data to Firebase database with UUID v7.
+    Args:
+        :param name: Name of the user
+        :param score: Interview score
+        :param interview_question: Question asked during interview
+        :param job_title: Job title
+        :param job_requirements: Job requirements
+        :param feedback: Feedback for the user
+    Returns:
+        :return: UUID of the newly created record
+    """
+    try:
+        # Generate UUID v7
+        entry_id = str(uuid7())
+        timestamp = datetime.now().isoformat()
+        if isinstance(score, np.int64):
+            score = int(score)
+        elif isinstance(score, (float, np.float64)):
+            score = int(round(score))
+        user_data = {
+            "id": entry_id,
+            "name": name,
+            "score": score,
+            "interview_question": interview_question,
+            "job_title": job_title,
+            "job_requirements": job_requirements,
+            "feedback": feedback,
+            "created_at": timestamp,
+            "updated_at": timestamp,
+        }
+        # Create a new entry using the UUID as the key
+        users_ref.child(entry_id).set(user_data)
+        print(f"Data for {name} successfully written to Firebase with ID: {entry_id}")
+        return entry_id
+    except Exception as e:
+        print(f"Error writing data to Firebase: {str(e)}")
+        raise
+def read_all_users():
+    """
+    Reads all user data from Firebase database and returns as a pandas DataFrame.
+    Returns:
+        :return pandas.DataFrame: DataFrame containing all user records with Firebase keys as index
+    """
+    try:
+        users = users_ref.get()
+        if not users:
+            print("No users found in the database.")
+            return pd.DataFrame()
+        # Convert Firebase data to DataFrame
+        df = pd.DataFrame.from_dict(users, orient="index")
+        # Reset index and rename it to 'firebase_key'
+        df = df.reset_index().rename(columns={"index": "firebase_key"})
+        # Reorder columns to put id and timestamps first
+        preferred_order = [
+            "firebase_key",
+            "id",
+            "created_at",
+            "updated_at",
+            "name",
+            "score",
+            "interview_question",
+            "job_title",
+            "job_requirements",
+            "feedback",
+        ]
+        actual_columns = [col for col in preferred_order if col in df.columns]
+        remaining_columns = [col for col in df.columns if col not in preferred_order]
+        df = df[actual_columns + remaining_columns]
+        # Convert timestamps to datetime
+        if "created_at" in df.columns:
+            df["created_at"] = pd.to_datetime(df["created_at"])
+        if "updated_at" in df.columns:
+            df["updated_at"] = pd.to_datetime(df["updated_at"])
+        # stream the List[str] feedback to str
+        df["feedback"] = df["feedback"].apply(lambda x: " ".join(x))
+        df_filtered = df[
+            ["name", "job_title", "interview_question", "score", "feedback"]
+        ]
+        return df_filtered
+    except Exception as e:
+        print(f"Error reading data from Firebase: {str(e)}")
+        raise
+def update_user_data(uuid, update_dict):
+    """
+    Updates existing user data in Firebase database.
+    Args:
+        :param update_dict: Dictionary containing fields to update
+        :param uuid: UUID of the record to update
+    Returns:
+        :return bool: True if update successful, False otherwise
+    """
+    try:
+        # Get current data
+        current_data = users_ref.child(uuid).get()
+        if not current_data:
+            print(f"No record found with UUID: {uuid}")
+            return False
+        # Update the timestamp
+        update_dict["updated_at"] = datetime.now().isoformat()
+        # Update only the specified fields
+        users_ref.child(uuid).update(update_dict)
+        print(f"Successfully updated record with UUID: {uuid}")
+        return True
+    except Exception as e:
+        print(f"Error updating data in Firebase: {str(e)}")
+        raise

src/configs/llm/nvidia-llama-3.1-nemotron-70b-instruct.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+PROVIDER: nvidia
+BASE_URL: https://integrate.api.nvidia.com/v1
+MODEL: nvidia/llama-3.1-nemotron-70b-instruct
+TEMPERATURE: 0

src/configs/llm/openai-gpt-3.5-turbo.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+PROVIDER: openai
+BASE_URL: default
+MODEL: gpt-3.5-turbo
+TEMPERATURE: 0

src/configs/llm/openai-gpt-4o-mini.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+PROVIDER: openai
+BASE_URL: default
+MODEL: gpt-4o-mini
+TEMPERATURE: 0

src/configs/parser/llamaparse_en.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+LANGUAGE: en
+DISABLE_OCR: false
+PAGE_ROC_BBOX:
+  TOP: 0
+  RIGHT: 0
+  BOTTOM: 0
+  LEFT: 0

src/domain/candidate.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from dataclasses import dataclass
+from typing import Dict, List
+@dataclass
+class Candidate:
+    id: str
+    name: str
+    email: str
+    resume_data: Dict
+    interview_responses: List[str]
+    emotional_metrics: Dict
+    feedback: Dict

src/domain/enums/__pycache__/emotion_types.cpython-312.pyc ADDED Viewed

Binary file (1.17 kB). View file

src/domain/enums/emotion_types.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from enum import Enum
+class EmotionType(Enum):
+    SAD = "sad"
+    FEAR = "fear"
+    ANGRY = "angry"
+    DISGUST = "disgust"
+    HAPPY = "happy"
+    NEUTRAL = "neutral"
+    SURPRISE = "surprise"
+    @classmethod
+    def get_positive_emotions(cls):
+        return [cls.HAPPY, cls.NEUTRAL, cls.SURPRISE]
+    @classmethod
+    def get_negative_emotions(cls):
+        return [cls.SAD, cls.FEAR, cls.ANGRY, cls.DISGUST]

src/domain/enums/interview_status.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from enum import Enum, auto
+class InterviewStatus(Enum):
+    SCHEDULED = auto()
+    IN_PROGRESS = auto()
+    COMPLETED = auto()
+    CANCELLED = auto()
+    PENDING_REVIEW = auto()
+    REVIEWED = auto()
+    FAILED = auto()

src/domain/interview.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from dataclasses import dataclass
+from datetime import datetime
+from typing import List, Dict
+from src.domain.enums.interview_status import InterviewStatus
+from src.domain.enums.emotion_types import EmotionType
+@dataclass
+class Interview:
+    id: str
+    candidate_id: str
+    job_id: str
+    video_path: str
+    status: InterviewStatus
+    questions: List[str]
+    responses_transcription: List[str]
+    timestamp: datetime
+    duration: int
+    emotional_analysis: Dict[EmotionType, float]
+    def is_completed(self) -> bool:
+        return self.status == InterviewStatus.COMPLETED
+    def is_reviewable(self) -> bool:
+        return self.status in [
+            InterviewStatus.COMPLETED,
+            InterviewStatus.PENDING_REVIEW,
+        ]

src/llm/__pycache__/base_llm_provider.cpython-312.pyc ADDED Viewed

Binary file (996 Bytes). View file

src/llm/__pycache__/enums.cpython-312.pyc ADDED Viewed

Binary file (274 Bytes). View file

src/llm/__pycache__/llm.cpython-312.pyc ADDED Viewed

Binary file (1.25 kB). View file

src/llm/__pycache__/nvidia_llm.cpython-312.pyc ADDED Viewed

Binary file (1.4 kB). View file

src/llm/__pycache__/openai_llm.cpython-312.pyc ADDED Viewed

Binary file (1.34 kB). View file

src/llm/base_llm_provider.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Base class for LLM providers"""
+from abc import abstractmethod
+from typing import Dict, Optional
+class BaseLLMProvider:
+    @abstractmethod
+    def __init__(self):
+        """LLM provider initialization"""
+        raise NotImplementedError
+    @abstractmethod
+    def complete(self, prompt: str = "") -> str:
+        """LLM chat completion implementation by each provider"""
+        raise NotImplementedError

src/llm/enums.py ADDED Viewed

	@@ -0,0 +1,3 @@

+OPENAI_LLM = "openai"
+NVIDIA_LLM = "nvidia"
+DEFAULT_LLM_API_BASE = "default"

src/llm/llm.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import yaml
+from src.llm.enums import OPENAI_LLM, NVIDIA_LLM
+from src.llm.base_llm_provider import BaseLLMProvider
+from src.llm.openai_llm import OpenAILLM
+from src.llm.nvidia_llm import NvidiaLLM
+def get_llm(config_file_path: str = "config.yaml") -> BaseLLMProvider:
+    """
+    Initiates LLM client from config file
+    """
+    # load config
+    with open(config_file_path, "r") as f:
+        config = yaml.safe_load(f)
+    # init & return llm
+    if config["PROVIDER"] == OPENAI_LLM:
+        return OpenAILLM(
+            model=config["MODEL"],
+            temperature=config["TEMPERATURE"],
+            base_url=config["BASE_URL"],
+        )
+    elif config["PROVIDER"] == NVIDIA_LLM:
+        return NvidiaLLM(
+            model=config["MODEL"],
+            temperature=config["TEMPERATURE"],
+            base_url=config["BASE_URL"],
+        )
+    else:
+        raise ValueError(config["MODEL"])

src/llm/nvidia_llm.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""NVIDIA LLM Implementation"""
+from llama_index.llms.nvidia import NVIDIA
+from src.llm.base_llm_provider import BaseLLMProvider
+from src.llm.enums import DEFAULT_LLM_API_BASE
+class NvidiaLLM(BaseLLMProvider):
+    def __init__(
+        self,
+        model: str = "nvidia/llama-3.1-nemotron-70b-instruct",
+        temperature: float = 0.0,
+        base_url: str = "https://integrate.api.nvidia.com/v1",
+    ):
+        """Initiate NVIDIA client"""
+        if base_url == DEFAULT_LLM_API_BASE:
+            self._client = NVIDIA(
+                model=model,
+                temperature=temperature,
+            )
+        else:
+            self._client = NVIDIA(
+                model=model, temperature=temperature, base_url=base_url
+            )
+    def complete(self, prompt: str = "") -> str:
+        return str(self._client.complete(prompt))

src/llm/openai_llm.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""OpenAI LLM Implementation"""
+from llama_index.llms.openai import OpenAI
+from src.llm.base_llm_provider import BaseLLMProvider
+from src.llm.enums import DEFAULT_LLM_API_BASE
+class OpenAILLM(BaseLLMProvider):
+    def __init__(
+        self,
+        model: str = "gpt-4o-mini",
+        temperature: float = 0.0,
+        base_url: str = DEFAULT_LLM_API_BASE,
+    ):
+        """Initiate OpenAI client"""
+        if base_url == DEFAULT_LLM_API_BASE:
+            self._client = OpenAI(
+                model=model,
+                temperature=temperature,
+            )
+        else:
+            self._client = OpenAI(
+                model=model, temperature=temperature, base_url=base_url
+            )
+    def complete(self, prompt: str = "") -> str:
+        return str(self._client.complete(prompt))

src/output/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/output/report.docx ADDED Viewed

Binary file (39.4 kB). View file

src/service/__pycache__/emotion_recognition.cpython-312.pyc ADDED Viewed

Binary file (7.31 kB). View file

src/service/__pycache__/resume_parser.cpython-312.pyc ADDED Viewed

Binary file (2.12 kB). View file

src/service/emotion_recognition.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import numpy as np
+from deepface import DeepFace
+from src.domain.enums.emotion_types import EmotionType
+class EmotionRecognition:
+    def __init__(self):
+        pass
+    @classmethod
+    def detect_face_emotions(cls, frames: list[np.ndarray] = None) -> list:
+        """
+        Performs facial emotion detection using the DeepFace model
+        """
+        emotions = []
+        for frame in frames:
+            frame_result = DeepFace.analyze(
+                frame, actions=["emotion"], enforce_detection=False
+            )
+            emotions.append(frame_result)
+        return emotions
+    @classmethod
+    def process_emotions(cls, emotions: list) -> dict:
+        """
+        Processes the emotions by calculating the overall confidence score using a
+        custom weighted emotion balancing algorithm.
+        Returns:
+        - weighted normalized score
+        - signed, weighted normalized score
+        - confidence score
+        """
+        count = 0
+        emots = {
+            str(EmotionType.SAD.value): 0,
+            str(EmotionType.FEAR.value): 0,
+            str(EmotionType.ANGRY.value): 0,
+            str(EmotionType.DISGUST.value): 0,
+            str(EmotionType.HAPPY.value): 0,
+            str(EmotionType.NEUTRAL.value): 0,
+            str(EmotionType.SURPRISE.value): 0,
+        }
+        for frame_result in emotions:
+            if len(frame_result) > 0:
+                emot = frame_result[0]["emotion"]
+                emots[str(EmotionType.SAD.value)] = (
+                    emots.get(str(EmotionType.SAD.value), 0)
+                    + emot[str(EmotionType.SAD.value)]
+                )
+                emots[str(EmotionType.FEAR.value)] = (
+                    emots.get(str(EmotionType.FEAR.value), 0)
+                    + emot[str(EmotionType.FEAR.value)]
+                )
+                emots[str(EmotionType.ANGRY.value)] = (
+                    emots.get(str(EmotionType.ANGRY.value), 0)
+                    + emot[str(EmotionType.ANGRY.value)]
+                )
+                emots[str(EmotionType.DISGUST.value)] = (
+                    emots.get(str(EmotionType.DISGUST.value), 0)
+                    + emot[str(EmotionType.DISGUST.value)]
+                )
+                emots[str(EmotionType.HAPPY.value)] = (
+                    emots.get(str(EmotionType.HAPPY.value), 0)
+                    + emot[str(EmotionType.HAPPY.value)]
+                )
+                emots[str(EmotionType.NEUTRAL.value)] = (
+                    emots.get(str(EmotionType.NEUTRAL.value), 0)
+                    + emot[str(EmotionType.NEUTRAL.value)]
+                )
+                emots[str(EmotionType.SURPRISE.value)] = (
+                    emots.get(str(EmotionType.SURPRISE.value), 0)
+                    + emot[str(EmotionType.SURPRISE.value)]
+                )
+                count += 1
+        # prevent zero division
+        if count == 0:
+            count = 1
+        for i in list(emots.keys()):
+            emots[i] /= count * 100
+        # refactor according to custom weightage
+        sad_score = emots[str(EmotionType.SAD.value)] * 1.3
+        fear_score = emots[str(EmotionType.FEAR.value)] * 1.3
+        angry_score = emots[str(EmotionType.ANGRY.value)] * 1.3
+        disgust_score = emots[str(EmotionType.DISGUST.value)] * 10
+        happy_score = emots[str(EmotionType.HAPPY.value)] * 1.7
+        neutral_score = emots[str(EmotionType.NEUTRAL.value)] / 1.2
+        surprise_score = emots[str(EmotionType.SURPRISE.value)] * 1.4
+        score_list = [
+            sad_score,
+            angry_score,
+            surprise_score,
+            fear_score,
+            happy_score,
+            disgust_score,
+            neutral_score,
+        ]
+        normalized_scores = cls.__normalize_scores(score_list)
+        mean = np.mean(normalized_scores)
+        result_scores = [
+            (-sad_score),
+            (-angry_score),
+            surprise_score,
+            (-fear_score),
+            happy_score,
+            (-disgust_score),
+            neutral_score,
+        ]
+        normalized_result_scores = cls.__normalize_scores(result_scores)
+        result = np.mean(normalized_result_scores)
+        difference = abs((mean - result) / mean) * 100
+        # keep values in range of [0, 100]
+        difference = min(difference, 50)
+        if mean > result:
+            conf = 50 - difference
+        else:
+            conf = 50 + difference
+        return {"mean": mean, "result": result, "conf": conf}
+    @classmethod
+    def __normalize_scores(cls, scores: list) -> list:
+        min_val, max_val = min(scores), max(scores)
+        return [(score - min_val) / (max_val - min_val) for score in scores]

src/service/resume_parser.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import yaml
+from llama_parse import LlamaParse
+from llama_index.core import SimpleDirectoryReader
+from src.template.parser_prompt import PARSE_RESUME_PROMPT
+class ResumeParser:
+    def __init__(self, config_file_path: str = "config.yaml"):
+        """
+        Initiates a resume parser client
+        """
+        # load config
+        with open(config_file_path, "r") as f:
+            config = yaml.safe_load(f)
+        # set bbox size
+        bbox_margin = config["PAGE_ROC_BBOX"]
+        bbox = f"{bbox_margin['TOP']},{bbox_margin['RIGHT']},{bbox_margin['BOTTOM']},{bbox_margin['LEFT']}"
+        self._parser = LlamaParse(
+            language=config["LANGUAGE"],
+            disable_ocr=config["DISABLE_OCR"],
+            bounding_box=bbox,
+            result_type="markdown",
+            parsing_instruction=PARSE_RESUME_PROMPT,
+            is_formatting_instruction=False,
+        )
+    def parse_resume_to_markdown(self, resume_path: str = "") -> str:
+        """
+        Parses the resume into markdown text.
+        Supported filetypes:
+        - .pdf
+        """
+        document = SimpleDirectoryReader(
+            input_files=[resume_path], file_extractor={".pdf": self._parser}
+        ).load_data()
+        return "\n".join([str(d.text) for d in document])

src/template/__pycache__/grading_prompt.cpython-312.pyc ADDED Viewed

Binary file (4.54 kB). View file

src/template/__pycache__/parser_prompt.cpython-312.pyc ADDED Viewed

Binary file (1.59 kB). View file

src/template/grading_prompt.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from llama_index.core.prompts import PromptTemplate
+GRADE_RESPONSE_PROMPT = PromptTemplate(
+    """
+You are a Human Resource Manager and an interviewer.
+Your task is to review an interviewee's overall performance based on multiple factors.
+You will be provided with the interview question, the interviewee's facial confidence score, their response to the question in text form, and additional context on the interview.
+The confidence score will range from 0 to 100, and you will also receive the text of their answers to the interview question.
+Based on this information, evaluate the interviewee’s performance in the following areas:
+1.  **Answer Quality**:
+    Assess the clarity, relevance, and accuracy of their response to the interview question.
+    Did the interviewee address the key points effectively?
+2.  **Problem-Solving Skills**:
+    Evaluate how well the interviewee tackled any problem presented in the interview question.
+    Were they able to think critically, analyze the situation, and propose solutions?
+3.  **Confidence**:
+    Based on their facial confidence score (0 to 100) and their overall demeanor in the response, rate their confidence level and how it impacts their presentation and communication.
+4.  **Personality**:
+    Consider the tone, communication style, and interpersonal skills of the interviewee.
+    How well did they engage with the question and the interview process?
+    Do they demonstrate qualities like openness, empathy, or assertiveness?
+5.  **Overall Performance**:
+    Based on the combination of the above factors, provide a holistic evaluation of their performance in the interview.
+    Offer feedback on strengths and areas for improvement.
+Ensure that your feedback is clear and actionable, so other HR professionals reviewing the interview can easily assess the interviewee's suitability for the position.
+########################################
+Interview Question:
+{interview_question}
+########################################
+Interviewee's Facial Confidence Score:
+{conf_score}
+########################################
+Interviewee's response in text:
+{response_text}
+########################################
+output:
+"""
+)
+RANKING_AND_FEEDBACK_PROMPT = PromptTemplate(
+    """
+You are an HR specialist evaluating an interviewee for a specific role.
+Your task is to assess the suitability of the interviewee based on the following information:
+1.  **Job Requirements**:
+    A list of skills, experiences, and qualifications required for the role.
+2.  **Interview Feedback**:
+    The feedback and review of the interviewee’s performance in the interview, which includes assessments on their answer quality, problem-solving skills, confidence, personality, and overall performance.
+3.  **Resume Text**:
+    A parsed version of the interviewee's resume, which includes their work experience, skills, education, and other relevant information.
+Using these inputs, generate an output strictly in the following YAML format:
+###########################
+name: <name>
+score: <score>
+feedback: <feedback text>
+###########################
+Details for the output:
+1.  **name**:
+    Name of the interviewee.
+2.  **score**:
+    A score ranging from 0 to 100, where 0 means the interviewee is not recommended for the position, and 100 means they are a perfect match for the job.
+3.  **feedback**:
+    - A detailed breakdown explaining how the interviewee’s experience, skills, and performance align or do not align with the job requirements.
+    - Discuss whether the interviewee’s skills, experiences, and overall traits match or fail to meet the required qualifications.
+    - Provide a short, concise sentence summarizing the interviewee’s suitability for the role.
+Ensure that the feedback is comprehensive yet concise, offering actionable insights for HR professionals to make a decision about the interviewee’s fit for the role.
+########################################
+Job Requirements:
+{job_requirements}
+########################################
+Interview Feedback:
+{interview_feedback}
+########################################
+Resume Text:
+{resume_text}
+########################################
+Output strictly following the below YAML format:
+name: <name>
+score: <score>
+feedback: <feedback text>
+"""
+)

src/template/parser_prompt.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from llama_index.core.prompts import PromptTemplate
+PARSE_RESUME_PROMPT = """
+You are tasked with parsing a resume.
+**Your Focus**:
+ - Reproduce only the main body text, including section headers and bullet points, exactly as received.
+ - Do not skip section numbers in the format DIGIT.DIGIT (e.g., 10.1, 3.1), you must apply a markdown header level based on the depth (e.g., # for main sections, ## for subsections) to reflect the appropriate hierarchy, and output them.
+ - Do make sure that section numbers are always followed by the corresponding section title without a '\n' character in between or separating them into different headers. Valid examples are as below:
+     - '# 14 Experience'
+     - '# 2 Education'
+   Invalid examples are as below:
+     - '# 14\n # Experience'
+     - '# 2\n # Education'
+ - You may only add markdown header symbols (#, ##, ###, etc.) to denote the hierarchical levels of section headers.
+ - Do not make up any text and headers that are not present in the original text.
+**Expected Output**:
+ - Text, section headers, and bullet points must be reproduced without any text edits, additions, or deletions, other than adding markdown header symbols (#, ##, ###, etc.).
+ - Use markdown headers to denote additional hierarchy (e.g., # for main sections, ## for subsections) based on the best interpretation of the document’s structure.
+"""

src/utils/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (3.94 kB). View file

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import cv2
+import yaml
+import numpy as np
+from pathlib import Path
+import speech_recognition as sr
+from moviepy import VideoFileClip
+def extract_audio(
+    input_video_file: str = "",
+    output_audio_file: str = "",
+) -> str:
+    """
+    Extracts audio from input video file, and save it to the respective path.
+    Returns the path to the saved audio file if extraction is successful.
+    Supported input video file formats are:
+     - .mp4
+     - .mov
+    Supported output audio file formats are:
+     - .wav
+    """
+    try:
+        input_video_file = str(Path(input_video_file))
+        output_audio_file = str(Path(output_audio_file))
+        # Load the video file
+        video = VideoFileClip(input_video_file)
+        # Extract audio and write to output file
+        video.audio.write_audiofile(output_audio_file)
+        print(f"[extract_audio()] : Audio extracted and saved to {output_audio_file}")
+        return output_audio_file
+    except Exception as e:
+        print(e)
+        return None
+def audio2text(audio_file: str = "") -> str:
+    """
+    Converts audio to text using Google's text-to-audio engine (Local),
+    and returns the text.
+    """
+    r = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = r.record(source)
+        text = r.recognize_google(audio)
+    return text
+def sample_frames(input_video_file: str = "", sample_rate: int = 2) -> list[np.ndarray]:
+    """
+    Samples one frame every 'sample_rate' frames from the video file and returns
+    them in the form of a list of Numpy ndarray objects.
+    """
+    cap = cv2.VideoCapture(input_video_file)
+    frames = []
+    count = 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if count % sample_rate == 0:
+            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        count += 1
+    cap.release()
+    return frames
+def parse_yaml_string(
+    yaml_string: str = "", expected_keys: list[str] = None, cleanup: bool = True
+) -> dict:
+    """
+    Parses a YAML string into a Python dictionary based on a list of
+    expected keys.
+    """
+    # removes ```YAML ``` heading and footers if present
+    if cleanup:
+        yaml_string = yaml_string.replace("YAML", "")
+        yaml_string = yaml_string.replace("yaml", "")
+        yaml_string = yaml_string.replace("`", "")
+    try:
+        parsed_data = yaml.safe_load(yaml_string)
+        # Handle missing keys with error handling
+        result = {}
+        for key in expected_keys:
+            if key in parsed_data:
+                result[key] = parsed_data[key]
+            else:
+                print(f"[parse_yaml_string()] : Missing key {key}")
+        return result
+    except KeyError as e:
+        print(e)
+        return None