Spaces:

capitaletech
/

cv_quality

Sleeping

File size: 6,795 Bytes

import json
from typing import List
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

def load_json_file(filename):
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON in {filename}: {e}")
        return None

cv_structure = load_json_file('cv_structure.json')
cv_sections = load_json_file('cv_sections.json')

class EducationElement(BaseModel):
    degree_present: bool = Field(description="Whether the degree is present")
    year_present: bool = Field(description="Whether the year is present")
    institution_present: bool = Field(description="Whether the institution is present")
    score: float = Field(description="Score for this education element", ge=0, le=10)

class Education(BaseModel):
    overall_score: float = Field(description="Overall score for the education section", ge=0, le=10)
    elements: List[EducationElement] = Field(description="List of education elements")

class WorkExperienceElement(BaseModel):
    job_title_present: bool = Field(description="Whether the job title is present")
    company_present: bool = Field(description="Whether the company name is present")
    dates_present: bool = Field(description="Whether the start and end dates are present")
    technologies_present: bool = Field(description="Whether the used technologies are present")
    responsibilities_present: bool = Field(description="Whether responsibilities are present")
    achievements_present: bool = Field(description="Whether achievements are present")
    responsibilities_quality: float = Field(description="Quality of responsibilities description", ge=0, le=10)
    achievements_quality: float = Field(description="Quality of achievements description", ge=0, le=10)
    score: float = Field(description="Score for this work experience element", ge=0, le=10)

class WorkExperience(BaseModel):
    overall_score: float = Field(description="Overall score for the work experience section", ge=0, le=10)
    elements: List[WorkExperienceElement] = Field(description="List of work experience elements")

class Profile(BaseModel):
    overall_score: float = Field(description="Overall score for the profile section", ge=0, le=10)
    brief_overview_present: bool = Field(description="Whether a brief overview is present")
    career_goals_present: bool = Field(description="Whether career goals are present")
    objective_present: bool = Field(description="Whether an objective is present")

class ResumeQualityEvaluation(BaseModel):
    education: Education = Field(description="Evaluation of the education section")
    work_experience: WorkExperience = Field(description="Evaluation of the work experience section")
    profile: Profile = Field(description="Evaluation of the profile section")

def get_personal_info_prompt(text):
    return f"""<s>[INST]Extract the personal information from the following CV text. The text may be in any language. Respond with a JSON object in the format {{"city": {{"extracted city name": true/false}}, "country": {{"extracted country name": true/false}}}}. If you can't find the information, set the value to false.

Text:
{text}[/INST]"""

def get_spelling_grammar_prompt(text):
    return f"""<s>[INST]Analyze the following text for spelling and grammar errors. The text may be in any language. Do not correct the errors, just count them. Calculate the percentage of errors.

Text to analyze:
{text}

Respond with a JSON object containing the key 'error_percentage' with the calculated percentage (0-100) of errors.[/INST]"""

def get_section_detection_prompt(text):
    if cv_sections is None:
        return None
    sections_list = ", ".join(cv_sections['sections'].keys())
    return f"""<s>[INST] Analyze this CV text and identify which of the following sections are present: {sections_list}. 
A section is considered present if its content is identifiable, even without an explicit title. 
Consider synonyms and alternative phrasings for section titles.

Sections to look for:
{sections_list}

CV text:
{text}

Respond with a JSON object with a key "present_sections" containing an array of the identified sections. 
Only include sections that are actually present in the CV. [/INST]"""

def get_content_quality_prompt(text):
    parser = PydanticOutputParser(pydantic_object=ResumeQualityEvaluation)
    
    prompt = PromptTemplate(
        template="""<s>[INST]Evaluate the quality of the following resume sections:

{resume}

Provide a detailed evaluation following this format:
{format_instructions}

For each section, evaluate the presence and quality of required elements:

1. Education:
   - Check for the presence of Degree, Year, and Institution for each education entry
   - Provide a score (0-10) for each education entry based on completeness and clarity

2. Work Experience:
   - Check for the presence of Job title, Company, dates, used technologies, Responsibilities, and Achievements for each work experience entry
   - Evaluate the quality of Responsibilities description (0-10)
   - Evaluate the quality of Achievements description (0-10)
   - Provide a score (0-10) for each work experience entry based on completeness, clarity, and the quality of descriptions

3. Profile:
   - Check for the presence of a brief overview, career goals, and objective
   - Provide an overall score (0-10) based on the completeness and clarity of the profile

Provide an overall score for each section on a scale of 0-10 based on the presence of elements and their quality where applicable.[/INST]""",
        input_variables=["resume"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    return prompt.format(resume=text)

def calculate_section_detection_score(detected_sections):
    total_score = 0
    for section in detected_sections:
        if section in cv_sections['sections']:
            total_score += cv_sections['sections'][section]
    return total_score

def calculate_overall_score(evaluation: ResumeQualityEvaluation) -> float:
    education_weight = 0.3
    work_experience_weight = 0.5
    profile_weight = 0.2
    
    overall_score = (
        evaluation.education.overall_score * education_weight +
        evaluation.work_experience.overall_score * work_experience_weight +
        evaluation.profile.overall_score * profile_weight
    )
    
    return round(overall_score, 2)


__all__ = ['ResumeQualityEvaluation', 'get_personal_info_prompt', 'get_spelling_grammar_prompt', 
           'get_section_detection_prompt', 'get_content_quality_prompt', 
           'calculate_section_detection_score', 'calculate_overall_score']