Spaces:

pquiggles
/

vsp-demo

Runtime error

File size: 5,165 Bytes

from enum import Enum

from pydantic import BaseModel, Field

from vsp.app import bindings
from vsp.app.model.linkedin.linkedin_formatters import format_education, format_profile_as_resume
from vsp.app.model.linkedin.linkedin_models import Education, LinkedinProfile
from vsp.app.prompts.prompt_loader import PromptLoader
from vsp.llm.llm_service import LLMService


class SchoolType(Enum):
    """
    Enumeration of different school types for education classification.

    This enum represents various levels and types of educational institutions.
    """

    PRIMARY_SECONDARY = "Primary / Secondary School"
    UNDERGRAD_INCOMPLETE = "Undergraduate (Incomplete)"
    UNDERGRAD_COMPLETED = "Undergraduate (Completed)"
    MBA = "MBA"
    LAW_SCHOOL = "Law School"
    GRAD_SCHOOL = "Graduate School"
    PHD = "PhD"
    OTHER = "Other"


_SCHOOL_TYPE_MAPPING: dict[str, SchoolType] = {
    "PRIMARY_SECONDARY": SchoolType.PRIMARY_SECONDARY,
    "UNDERGRAD_INCOMPLETE": SchoolType.UNDERGRAD_INCOMPLETE,
    "UNDERGRAD_COMPLETED": SchoolType.UNDERGRAD_COMPLETED,
    "MBA": SchoolType.MBA,
    "LAW_SCHOOL": SchoolType.LAW_SCHOOL,
    "GRAD_SCHOOL": SchoolType.GRAD_SCHOOL,
    "PHD": SchoolType.PHD,
    "OTHER": SchoolType.OTHER,
}


class EducationClassification(BaseModel):
    """
    Pydantic model representing the classification result for an education item.

    Attributes:
        output (SchoolType): The classified school type.
        reasoning (str): Explanation for the classification decision.
    """

    output: SchoolType = Field(description="The classified school type")
    reasoning: str = Field(description="Explanation for the classification")
    model_config = {"frozen": True}  # This makes the model immutable and hashable


class EducationClassifier:
    """
    A class for classifying education items from Linkedin profiles.

    This classifier uses a language model to determine the type of educational
    institution and program based on the information provided in a Linkedin profile.

    Attributes:
        _llm_service (LLMService): The language model service used for classification.
        _prompt_template (Any): The template for generating prompts for the language model.
        _prompt_loader (PromptLoader): The loader for prompt templates.
    """

    @staticmethod
    def _parse_output(output: str) -> EducationClassification:
        """
        Parse the output from the language model into an EducationClassification object.

        Args:
            output (str): The raw output string from the language model.

        Returns:
            EducationClassification: A structured representation of the classification result.

        Raises:
            ValueError: If the output contains an unknown school type.
        """
        lines = output.strip().split("\n")
        parsed = {key.strip(): value.strip() for line in lines for key, value in [line.split(":", 1)]}

        match parsed["output"].upper():
            case school_type if school_type in _SCHOOL_TYPE_MAPPING:
                return EducationClassification(
                    output=_SCHOOL_TYPE_MAPPING[school_type],
                    reasoning=parsed["reasoning"],
                )
            case _:
                raise ValueError(f"Unknown school type: {parsed['output']}")

    def __init__(
        self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
    ):
        """
        Initialize the EducationClassifier.

        Args:
            llm_service (LLMService, optional): The language model service to use.
                Defaults to the OpenAI service defined in bindings.
            prompt_loader (PromptLoader, optional): The prompt loader to use.
                Defaults to the prompt loader defined in bindings.
        """
        self._llm_service = llm_service
        self._prompt_template = prompt_loader.load_template("education_classifier/1 - education_classifier")
        self._prompt_loader = prompt_loader

    async def classify_education(
        self, linkedin_profile: LinkedinProfile, education: Education
    ) -> EducationClassification:
        """
        Classify a single education item from a Linkedin profile.

        This method prepares the input for the language model, sends the query,
        and processes the result to classify the education item.

        Args:
            linkedin_profile (LinkedinProfile): The full Linkedin profile of the individual.
            education (Education): The specific education item to classify.

        Returns:
            EducationClassification: The classification result for the education item.

        Raises:
            ValueError: If the prompt evaluation fails to produce a result.
        """
        prompt = self._prompt_loader.create_prompt(
            self._prompt_template,
            llm_service=self._llm_service,
            output_formatter=EducationClassifier._parse_output,
            resume=format_profile_as_resume(linkedin_profile),
            education=format_education(education),
        )
        return await prompt.evaluate()  # type: ignore