File size: 5,165 Bytes
3b993c4 24d33b9 3b993c4 e261f25 3b993c4 49b13c6 3b993c4 49b13c6 3b993c4 49b13c6 3b993c4 49b13c6 3b993c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from enum import Enum
from pydantic import BaseModel, Field
from vsp.app import bindings
from vsp.app.model.linkedin.linkedin_formatters import format_education, format_profile_as_resume
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile
from vsp.app.prompts.prompt_loader import PromptLoader
from vsp.llm.llm_service import LLMService
class SchoolType(Enum):
"""
Enumeration of different school types for education classification.
This enum represents various levels and types of educational institutions.
"""
PRIMARY_SECONDARY = "Primary / Secondary School"
UNDERGRAD_INCOMPLETE = "Undergraduate (Incomplete)"
UNDERGRAD_COMPLETED = "Undergraduate (Completed)"
MBA = "MBA"
LAW_SCHOOL = "Law School"
GRAD_SCHOOL = "Graduate School"
PHD = "PhD"
OTHER = "Other"
_SCHOOL_TYPE_MAPPING: dict[str, SchoolType] = {
"PRIMARY_SECONDARY": SchoolType.PRIMARY_SECONDARY,
"UNDERGRAD_INCOMPLETE": SchoolType.UNDERGRAD_INCOMPLETE,
"UNDERGRAD_COMPLETED": SchoolType.UNDERGRAD_COMPLETED,
"MBA": SchoolType.MBA,
"LAW_SCHOOL": SchoolType.LAW_SCHOOL,
"GRAD_SCHOOL": SchoolType.GRAD_SCHOOL,
"PHD": SchoolType.PHD,
"OTHER": SchoolType.OTHER,
}
class EducationClassification(BaseModel):
"""
Pydantic model representing the classification result for an education item.
Attributes:
output (SchoolType): The classified school type.
reasoning (str): Explanation for the classification decision.
"""
output: SchoolType = Field(description="The classified school type")
reasoning: str = Field(description="Explanation for the classification")
model_config = {"frozen": True} # This makes the model immutable and hashable
class EducationClassifier:
"""
A class for classifying education items from Linkedin profiles.
This classifier uses a language model to determine the type of educational
institution and program based on the information provided in a Linkedin profile.
Attributes:
_llm_service (LLMService): The language model service used for classification.
_prompt_template (Any): The template for generating prompts for the language model.
_prompt_loader (PromptLoader): The loader for prompt templates.
"""
@staticmethod
def _parse_output(output: str) -> EducationClassification:
"""
Parse the output from the language model into an EducationClassification object.
Args:
output (str): The raw output string from the language model.
Returns:
EducationClassification: A structured representation of the classification result.
Raises:
ValueError: If the output contains an unknown school type.
"""
lines = output.strip().split("\n")
parsed = {key.strip(): value.strip() for line in lines for key, value in [line.split(":", 1)]}
match parsed["output"].upper():
case school_type if school_type in _SCHOOL_TYPE_MAPPING:
return EducationClassification(
output=_SCHOOL_TYPE_MAPPING[school_type],
reasoning=parsed["reasoning"],
)
case _:
raise ValueError(f"Unknown school type: {parsed['output']}")
def __init__(
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
):
"""
Initialize the EducationClassifier.
Args:
llm_service (LLMService, optional): The language model service to use.
Defaults to the OpenAI service defined in bindings.
prompt_loader (PromptLoader, optional): The prompt loader to use.
Defaults to the prompt loader defined in bindings.
"""
self._llm_service = llm_service
self._prompt_template = prompt_loader.load_template("education_classifier/1 - education_classifier")
self._prompt_loader = prompt_loader
async def classify_education(
self, linkedin_profile: LinkedinProfile, education: Education
) -> EducationClassification:
"""
Classify a single education item from a Linkedin profile.
This method prepares the input for the language model, sends the query,
and processes the result to classify the education item.
Args:
linkedin_profile (LinkedinProfile): The full Linkedin profile of the individual.
education (Education): The specific education item to classify.
Returns:
EducationClassification: The classification result for the education item.
Raises:
ValueError: If the prompt evaluation fails to produce a result.
"""
prompt = self._prompt_loader.create_prompt(
self._prompt_template,
llm_service=self._llm_service,
output_formatter=EducationClassifier._parse_output,
resume=format_profile_as_resume(linkedin_profile),
education=format_education(education),
)
return await prompt.evaluate() # type: ignore
|