File size: 5,165 Bytes
3b993c4
 
 
 
24d33b9
 
 
 
 
3b993c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e261f25
3b993c4
 
 
 
49b13c6
3b993c4
 
49b13c6
3b993c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49b13c6
3b993c4
 
 
 
 
49b13c6
3b993c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from enum import Enum

from pydantic import BaseModel, Field

from vsp.app import bindings
from vsp.app.model.linkedin.linkedin_formatters import format_education, format_profile_as_resume
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile
from vsp.app.prompts.prompt_loader import PromptLoader
from vsp.llm.llm_service import LLMService


class SchoolType(Enum):
    """
    Enumeration of different school types for education classification.

    This enum represents various levels and types of educational institutions.
    """

    PRIMARY_SECONDARY = "Primary / Secondary School"
    UNDERGRAD_INCOMPLETE = "Undergraduate (Incomplete)"
    UNDERGRAD_COMPLETED = "Undergraduate (Completed)"
    MBA = "MBA"
    LAW_SCHOOL = "Law School"
    GRAD_SCHOOL = "Graduate School"
    PHD = "PhD"
    OTHER = "Other"


_SCHOOL_TYPE_MAPPING: dict[str, SchoolType] = {
    "PRIMARY_SECONDARY": SchoolType.PRIMARY_SECONDARY,
    "UNDERGRAD_INCOMPLETE": SchoolType.UNDERGRAD_INCOMPLETE,
    "UNDERGRAD_COMPLETED": SchoolType.UNDERGRAD_COMPLETED,
    "MBA": SchoolType.MBA,
    "LAW_SCHOOL": SchoolType.LAW_SCHOOL,
    "GRAD_SCHOOL": SchoolType.GRAD_SCHOOL,
    "PHD": SchoolType.PHD,
    "OTHER": SchoolType.OTHER,
}


class EducationClassification(BaseModel):
    """
    Pydantic model representing the classification result for an education item.

    Attributes:
        output (SchoolType): The classified school type.
        reasoning (str): Explanation for the classification decision.
    """

    output: SchoolType = Field(description="The classified school type")
    reasoning: str = Field(description="Explanation for the classification")
    model_config = {"frozen": True}  # This makes the model immutable and hashable


class EducationClassifier:
    """
    A class for classifying education items from Linkedin profiles.

    This classifier uses a language model to determine the type of educational
    institution and program based on the information provided in a Linkedin profile.

    Attributes:
        _llm_service (LLMService): The language model service used for classification.
        _prompt_template (Any): The template for generating prompts for the language model.
        _prompt_loader (PromptLoader): The loader for prompt templates.
    """

    @staticmethod
    def _parse_output(output: str) -> EducationClassification:
        """
        Parse the output from the language model into an EducationClassification object.

        Args:
            output (str): The raw output string from the language model.

        Returns:
            EducationClassification: A structured representation of the classification result.

        Raises:
            ValueError: If the output contains an unknown school type.
        """
        lines = output.strip().split("\n")
        parsed = {key.strip(): value.strip() for line in lines for key, value in [line.split(":", 1)]}

        match parsed["output"].upper():
            case school_type if school_type in _SCHOOL_TYPE_MAPPING:
                return EducationClassification(
                    output=_SCHOOL_TYPE_MAPPING[school_type],
                    reasoning=parsed["reasoning"],
                )
            case _:
                raise ValueError(f"Unknown school type: {parsed['output']}")

    def __init__(
        self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
    ):
        """
        Initialize the EducationClassifier.

        Args:
            llm_service (LLMService, optional): The language model service to use.
                Defaults to the OpenAI service defined in bindings.
            prompt_loader (PromptLoader, optional): The prompt loader to use.
                Defaults to the prompt loader defined in bindings.
        """
        self._llm_service = llm_service
        self._prompt_template = prompt_loader.load_template("education_classifier/1 - education_classifier")
        self._prompt_loader = prompt_loader

    async def classify_education(
        self, linkedin_profile: LinkedinProfile, education: Education
    ) -> EducationClassification:
        """
        Classify a single education item from a Linkedin profile.

        This method prepares the input for the language model, sends the query,
        and processes the result to classify the education item.

        Args:
            linkedin_profile (LinkedinProfile): The full Linkedin profile of the individual.
            education (Education): The specific education item to classify.

        Returns:
            EducationClassification: The classification result for the education item.

        Raises:
            ValueError: If the prompt evaluation fails to produce a result.
        """
        prompt = self._prompt_loader.create_prompt(
            self._prompt_template,
            llm_service=self._llm_service,
            output_formatter=EducationClassifier._parse_output,
            resume=format_profile_as_resume(linkedin_profile),
            education=format_education(education),
        )
        return await prompt.evaluate()  # type: ignore