Spaces:
Sleeping
Sleeping
import json | |
from typing import List | |
from langchain_core.pydantic_v1 import BaseModel, Field | |
from langchain.output_parsers import PydanticOutputParser | |
from langchain_core.prompts import PromptTemplate | |
def load_json_file(filename): | |
try: | |
with open(filename, 'r', encoding='utf-8') as f: | |
return json.load(f) | |
except json.JSONDecodeError as e: | |
print(f"Error decoding JSON in {filename}: {e}") | |
return None | |
cv_structure = load_json_file('cv_structure.json') | |
cv_sections = load_json_file('cv_sections.json') | |
class EducationElement(BaseModel): | |
degree_present: bool = Field(description="Whether the degree is present") | |
year_present: bool = Field(description="Whether the year is present") | |
institution_present: bool = Field(description="Whether the institution is present") | |
score: float = Field(description="Score for this education element", ge=0, le=10) | |
class Education(BaseModel): | |
overall_score: float = Field(description="Overall score for the education section", ge=0, le=10) | |
elements: List[EducationElement] = Field(description="List of education elements") | |
class WorkExperienceElement(BaseModel): | |
job_title_present: bool = Field(description="Whether the job title is present") | |
company_present: bool = Field(description="Whether the company name is present") | |
dates_present: bool = Field(description="Whether the start and end dates are present") | |
technologies_present: bool = Field(description="Whether the used technologies are present") | |
responsibilities_present: bool = Field(description="Whether responsibilities are present") | |
achievements_present: bool = Field(description="Whether achievements are present") | |
responsibilities_quality: float = Field(description="Quality of responsibilities description", ge=0, le=10) | |
achievements_quality: float = Field(description="Quality of achievements description", ge=0, le=10) | |
score: float = Field(description="Score for this work experience element", ge=0, le=10) | |
class WorkExperience(BaseModel): | |
overall_score: float = Field(description="Overall score for the work experience section", ge=0, le=10) | |
elements: List[WorkExperienceElement] = Field(description="List of work experience elements") | |
class Profile(BaseModel): | |
overall_score: float = Field(description="Overall score for the profile section", ge=0, le=10) | |
brief_overview_present: bool = Field(description="Whether a brief overview is present") | |
career_goals_present: bool = Field(description="Whether career goals are present") | |
objective_present: bool = Field(description="Whether an objective is present") | |
class ResumeQualityEvaluation(BaseModel): | |
education: Education = Field(description="Evaluation of the education section") | |
work_experience: WorkExperience = Field(description="Evaluation of the work experience section") | |
profile: Profile = Field(description="Evaluation of the profile section") | |
def get_personal_info_prompt(text): | |
return f"""<s>[INST]Extract the personal information from the following CV text. The text may be in any language. Respond with a JSON object in the format {{"city": {{"extracted city name": true/false}}, "country": {{"extracted country name": true/false}}}}. If you can't find the information, set the value to false. | |
Text: | |
{text}[/INST]""" | |
def get_spelling_grammar_prompt(text): | |
return f"""<s>[INST]Analyze the following text for spelling and grammar errors. The text may be in any language. Do not correct the errors, just count them. Calculate the percentage of errors. | |
Text to analyze: | |
{text} | |
Respond with a JSON object containing the key 'error_percentage' with the calculated percentage (0-100) of errors.[/INST]""" | |
def get_section_detection_prompt(text): | |
if cv_sections is None: | |
return None | |
sections_list = ", ".join(cv_sections['sections'].keys()) | |
return f"""<s>[INST] Analyze this CV text and identify which of the following sections are present: {sections_list}. | |
A section is considered present if its content is identifiable, even without an explicit title. | |
Consider synonyms and alternative phrasings for section titles. | |
Sections to look for: | |
{sections_list} | |
CV text: | |
{text} | |
Respond with a JSON object with a key "present_sections" containing an array of the identified sections. | |
Only include sections that are actually present in the CV. [/INST]""" | |
def get_content_quality_prompt(text): | |
parser = PydanticOutputParser(pydantic_object=ResumeQualityEvaluation) | |
prompt = PromptTemplate( | |
template="""<s>[INST]Evaluate the quality of the following resume sections: | |
{resume} | |
Provide a detailed evaluation following this format: | |
{format_instructions} | |
For each section, evaluate the presence and quality of required elements: | |
1. Education: | |
- Check for the presence of Degree, Year, and Institution for each education entry | |
- Provide a score (0-10) for each education entry based on completeness and clarity | |
2. Work Experience: | |
- Check for the presence of Job title, Company, dates, used technologies, Responsibilities, and Achievements for each work experience entry | |
- Evaluate the quality of Responsibilities description (0-10) | |
- Evaluate the quality of Achievements description (0-10) | |
- Provide a score (0-10) for each work experience entry based on completeness, clarity, and the quality of descriptions | |
3. Profile: | |
- Check for the presence of a brief overview, career goals, and objective | |
- Provide an overall score (0-10) based on the completeness and clarity of the profile | |
Provide an overall score for each section on a scale of 0-10 based on the presence of elements and their quality where applicable.[/INST]""", | |
input_variables=["resume"], | |
partial_variables={"format_instructions": parser.get_format_instructions()} | |
) | |
return prompt.format(resume=text) | |
def calculate_section_detection_score(detected_sections): | |
total_score = 0 | |
for section in detected_sections: | |
if section in cv_sections['sections']: | |
total_score += cv_sections['sections'][section] | |
return total_score | |
def calculate_overall_score(evaluation: ResumeQualityEvaluation) -> float: | |
education_weight = 0.3 | |
work_experience_weight = 0.5 | |
profile_weight = 0.2 | |
overall_score = ( | |
evaluation.education.overall_score * education_weight + | |
evaluation.work_experience.overall_score * work_experience_weight + | |
evaluation.profile.overall_score * profile_weight | |
) | |
return round(overall_score, 2) | |
__all__ = ['ResumeQualityEvaluation', 'get_personal_info_prompt', 'get_spelling_grammar_prompt', | |
'get_section_detection_prompt', 'get_content_quality_prompt', | |
'calculate_section_detection_score', 'calculate_overall_score'] |