navkast commited on
Commit
85c3ddd
·
unverified ·
1 Parent(s): e261f25

Investment focus - asset class, sector - classifiers (#4)

Browse files

* Investment focus asset class classifier

* Investment focus sector classifier

src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "\n",
11
+ "os.getcwd()\n",
12
+ "os.chdir(path=os.getcwd() + \"/../../../\")"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "name": "stdout",
22
+ "output_type": "stream",
23
+ "text": [
24
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "import json\n",
30
+ "\n",
31
+ "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
32
+ "\n",
33
+ "print(os.getcwd())\n",
34
+ "\n",
35
+ "with open(\"notebooks/data/eric_armagost.json\") as f:\n",
36
+ " data = json.load(f)\n",
37
+ "\n",
38
+ "profile = LinkedinProfile.profile_from_json(data)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "name": "stdout",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
51
+ "\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
52
+ "\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
53
+ "\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
54
+ "\u001b[2m2024-09-10 13:03:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
55
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
56
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1062\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m102\u001b[0m\n",
57
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
58
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1139\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m97\u001b[0m\n",
59
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1062\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
61
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
63
+ "{\n",
64
+ " \"investing_focus_asset_class\": \"Other\",\n",
65
+ " \"confidence\": 0.4,\n",
66
+ " \"reasoning\": \"The specific work experience at William Blair & Company is categorized as \\\"Investment Banking,\\\" which primarily focuses on advisory services rather than direct investments in asset classes like venture capital or private equity. The lack of detailed investment-related responsibilities in the description further supports this classification, leading to a lower confidence level.\",\n",
67
+ " \"other_description\": \"Investment banking typically involves advisory services for mergers and acquisitions, capital raising, and financial restructuring, rather than direct investments.\"\n",
68
+ "}\n",
69
+ "{\n",
70
+ " \"investing_focus_asset_class\": \"Private Equity / Buyouts\",\n",
71
+ " \"confidence\": 0.9,\n",
72
+ " \"reasoning\": \"The specific work experience at Accel-KKR indicates a focus on private equity investments, particularly in technology and software sectors. The description highlights typical transactions such as acquisitions, buyouts of divisions from public companies, and take-private transactions, which are all characteristic of private equity buyouts. This aligns well with the classification of PRIVATE_EQUITY_BUYOUTS.\",\n",
73
+ " \"other_description\": null\n",
74
+ "}\n",
75
+ "{\n",
76
+ " \"investing_focus_asset_class\": \"Other\",\n",
77
+ " \"confidence\": 0.4,\n",
78
+ " \"reasoning\": \"The specific work experience at Fidelity Investments as FFAS Corporate Finance does not provide clear evidence of a direct focus on any of the predefined investing categories. The role likely involved corporate finance activities, which may include advisory services rather than direct investment activities in the specified asset classes. Therefore, I classified it as OTHER, with a low confidence level due to the lack of detailed investment-related responsibilities in the description.\",\n",
79
+ " \"other_description\": \"Corporate finance within a financial services context, focusing on financial advisory and capital raising.\"\n",
80
+ "}\n",
81
+ "{\n",
82
+ " \"investing_focus_asset_class\": \"Other\",\n",
83
+ " \"confidence\": 0.4,\n",
84
+ " \"reasoning\": \"The specific work experience at Fidelity Investments as a PI Corporate Finance professional does not provide enough detail to classify it into one of the predefined investing focus or asset class categories. The lack of a description of responsibilities or specific investment activities makes it difficult to ascertain a clear focus. Therefore, I classified it as OTHER, reflecting its general corporate finance nature within the financial services sector.\",\n",
85
+ " \"other_description\": \"Corporate finance role focused on financial services, not directly tied to a specific investing focus or asset class.\"\n",
86
+ "}\n"
87
+ ]
88
+ }
89
+ ],
90
+ "source": [
91
+ "import asyncio\n",
92
+ "\n",
93
+ "from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (\n",
94
+ " InvestingFocusAssetClassClassifier,\n",
95
+ ")\n",
96
+ "\n",
97
+ "investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()\n",
98
+ "\n",
99
+ "all_positions_classified = []\n",
100
+ "\n",
101
+ "\n",
102
+ "async def classify_investing_focus_asset_class(profile, position):\n",
103
+ " classification = await investing_focus_asset_class_classifier.classify_investing_focus_asset_class(\n",
104
+ " profile, position\n",
105
+ " )\n",
106
+ " all_positions_classified.append(classification)\n",
107
+ "\n",
108
+ "\n",
109
+ "await asyncio.gather(*[classify_investing_focus_asset_class(profile, position) for position in profile.positions])\n",
110
+ "\n",
111
+ "for classification in all_positions_classified:\n",
112
+ " print(classification.model_dump_json(indent=2))"
113
+ ]
114
+ }
115
+ ],
116
+ "metadata": {
117
+ "kernelspec": {
118
+ "display_name": ".venv",
119
+ "language": "python",
120
+ "name": "python3"
121
+ },
122
+ "language_info": {
123
+ "codemirror_mode": {
124
+ "name": "ipython",
125
+ "version": 3
126
+ },
127
+ "file_extension": ".py",
128
+ "mimetype": "text/x-python",
129
+ "name": "python",
130
+ "nbconvert_exporter": "python",
131
+ "pygments_lexer": "ipython3",
132
+ "version": "3.12.5"
133
+ }
134
+ },
135
+ "nbformat": 4,
136
+ "nbformat_minor": 2
137
+ }
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "\n",
11
+ "os.getcwd()\n",
12
+ "os.chdir(path=os.getcwd() + \"/../../../\")"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 2,
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "name": "stdout",
22
+ "output_type": "stream",
23
+ "text": [
24
+ "/Volumes/code/geometric/VSP-data-enrichment/src\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "import json\n",
30
+ "\n",
31
+ "from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
32
+ "\n",
33
+ "print(os.getcwd())\n",
34
+ "\n",
35
+ "with open(\"notebooks/data/eric_armagost.json\") as f:\n",
36
+ " data = json.load(f)\n",
37
+ "\n",
38
+ "profile = LinkedinProfile.profile_from_json(data)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 4,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "name": "stdout",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
51
+ "\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
52
+ "\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
53
+ "\u001b[2m2024-09-10 14:00:23\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
54
+ "\u001b[2m2024-09-10 14:00:23\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
55
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
56
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1138\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m92\u001b[0m\n",
57
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
58
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
59
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
60
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1060\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m95\u001b[0m\n",
61
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
62
+ "\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m115\u001b[0m\n",
63
+ "{\n",
64
+ " \"investing_focus_sector\": \"Technology / Software / TMT\",\n",
65
+ " \"confidence\": 0.9,\n",
66
+ " \"reasoning\": \"The specific work experience at Accel-KKR clearly indicates a focus on investing in software and technology-enabled services companies. The description highlights that the firm is dedicated exclusively to this sector, which aligns with the TECHNOLOGY_SOFTWARE_TMT category. The candidate's role as an Investment Professional further supports this classification, as it involves transactions related to technology companies.\"\n",
67
+ "}\n",
68
+ "{\n",
69
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
70
+ " \"confidence\": 0.8,\n",
71
+ " \"reasoning\": \"The specific work experience at Fidelity Investments is clearly within the financial services sector, as indicated by the company name and industry classification. The role in FFAS Corporate Finance suggests involvement in financial analysis and corporate finance activities, which aligns with the focus on financial institutions. While the description lacks detail, the context of Fidelity Investments as a major player in financial services supports a strong classification in this sector.\"\n",
72
+ "}\n",
73
+ "{\n",
74
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
75
+ " \"confidence\": 0.8,\n",
76
+ " \"reasoning\": \"The specific work experience item is from Fidelity Investments, a company clearly identified within the financial services sector. The job title \\\"PI Corporate Finance\\\" suggests a focus on corporate finance activities, which are typically associated with financial institutions. While the description lacks detail, the context of the company and the role strongly indicate a focus on financial services, justifying a high confidence level.\"\n",
77
+ "}\n",
78
+ "{\n",
79
+ " \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
80
+ " \"confidence\": 0.8,\n",
81
+ " \"reasoning\": \"The specific work experience item indicates that the candidate worked in Investment Banking at William Blair & Company, which is categorized under Financial Services. This aligns with the focus on financial institutions, as investment banking involves services related to capital raising, mergers and acquisitions, and financial advisory, all of which are core functions of financial institutions. The absence of a detailed description does not detract significantly from this classification, as the job title and company industry provide strong evidence for the sector classification.\"\n",
82
+ "}\n"
83
+ ]
84
+ }
85
+ ],
86
+ "source": [
87
+ "import asyncio\n",
88
+ "\n",
89
+ "from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (\n",
90
+ " InvestingFocusSectorClassifier,\n",
91
+ ")\n",
92
+ "\n",
93
+ "investing_focus_sector_classifier = InvestingFocusSectorClassifier()\n",
94
+ "\n",
95
+ "all_positions_classified = []\n",
96
+ "\n",
97
+ "\n",
98
+ "async def classify_investing_focus_sectors(profile, position):\n",
99
+ " classification = await investing_focus_sector_classifier.classify_investing_focus_sector(profile, position)\n",
100
+ " all_positions_classified.append(classification)\n",
101
+ "\n",
102
+ "\n",
103
+ "await asyncio.gather(*[classify_investing_focus_sectors(profile, position) for position in profile.positions])\n",
104
+ "\n",
105
+ "for classification in all_positions_classified:\n",
106
+ " print(classification.model_dump_json(indent=2))"
107
+ ]
108
+ }
109
+ ],
110
+ "metadata": {
111
+ "kernelspec": {
112
+ "display_name": ".venv",
113
+ "language": "python",
114
+ "name": "python3"
115
+ },
116
+ "language_info": {
117
+ "codemirror_mode": {
118
+ "name": "ipython",
119
+ "version": 3
120
+ },
121
+ "file_extension": ".py",
122
+ "mimetype": "text/x-python",
123
+ "name": "python",
124
+ "nbconvert_exporter": "python",
125
+ "pygments_lexer": "ipython3",
126
+ "version": "3.12.5"
127
+ }
128
+ },
129
+ "nbformat": 4,
130
+ "nbformat_minor": 2
131
+ }
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ investing_focus_asset_class_classifier.py
3
+
4
+ This module provides functionality for classifying investing focus and asset classes
5
+ based on work experiences from LinkedIn profiles. It uses a language model to
6
+ determine the specific investing focus or asset class a position belongs to.
7
+
8
+ Classes:
9
+ InvestingFocusAssetClass: Enum representing different investing focus and asset classes.
10
+ InvestingFocusAssetClassClassification: Pydantic model for classification results.
11
+ InvestingFocusAssetClassClassifier: Main class for classifying investing focus and asset classes.
12
+
13
+ Usage:
14
+ classifier = InvestingFocusAssetClassClassifier()
15
+ classification = await classifier.classify_investing_focus_asset_class(linkedin_profile, work_experience)
16
+ """
17
+
18
+ from enum import Enum
19
+ from typing import Any, Final
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+ from vsp.app import bindings
24
+ from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
25
+ from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
26
+ from vsp.app.prompts.prompt_loader import PromptLoader
27
+ from vsp.llm.llm_service import LLMService
28
+
29
+
30
+ class InvestingFocusAssetClass(str, Enum):
31
+ """Enumeration of different investing focus and asset classes."""
32
+
33
+ EARLY_STAGE_VC = "Early-stage VC"
34
+ LATE_STAGE_VC = "Late-stage VC"
35
+ MULTI_STAGE_VC = "Multi-stage VC"
36
+ GROWTH_EQUITY = "Growth Equity"
37
+ PRE_IPO = "Pre-IPO"
38
+ PUBLIC_EQUITIES = "Public Equities"
39
+ REAL_ESTATE = "Real Estate"
40
+ PRIVATE_EQUITY_BUYOUTS = "Private Equity / Buyouts"
41
+ HEDGE_FUND = "Hedge Fund"
42
+ CREDIT = "Credit"
43
+ SECONDARIES = "Secondaries"
44
+ OTHER = "Other"
45
+
46
+
47
+ _INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
48
+ focus.name: focus for focus in InvestingFocusAssetClass
49
+ }
50
+
51
+
52
+ class InvestingFocusAssetClassClassification(BaseModel):
53
+ """
54
+ Pydantic model representing the classification result for an investing focus or asset class.
55
+
56
+ Attributes:
57
+ investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
58
+ confidence (float): Confidence level of the classification, between 0.0 and 1.0.
59
+ reasoning (str): Explanation for the classification decision.
60
+ other_description (str | None): Description for 'Other' classification, if applicable.
61
+ """
62
+
63
+ investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
64
+ confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
65
+ reasoning: str = Field(description="Explanation for the classification")
66
+ other_description: str | None = Field(default=None, description="Description for 'Other' classification")
67
+ model_config = {"frozen": True} # This makes the model immutable and hashable
68
+
69
+
70
+ class InvestingFocusAssetClassClassifier:
71
+ """
72
+ A class for classifying investing focus and asset classes based on work experiences from LinkedIn profiles.
73
+
74
+ This classifier uses a language model to determine the specific investing focus or asset class
75
+ a position belongs to based on the information provided in a LinkedIn profile and specific work experience.
76
+
77
+ Attributes:
78
+ _llm_service (LLMService): The language model service used for classification.
79
+ _prompt_template (Any): The template for generating prompts for the language model.
80
+ _prompt_loader (PromptLoader): The loader for prompt templates.
81
+ """
82
+
83
+ def __init__(
84
+ self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
85
+ ) -> None:
86
+ """
87
+ Initialize the InvestingFocusAssetClassClassifier.
88
+
89
+ Args:
90
+ llm_service (LLMService, optional): The language model service to use.
91
+ Defaults to the OpenAI service defined in bindings.
92
+ prompt_loader (PromptLoader, optional): The prompt loader to use.
93
+ Defaults to the prompt loader defined in bindings.
94
+ """
95
+ self._llm_service = llm_service
96
+ self._prompt_template = prompt_loader.load_template(
97
+ "work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier"
98
+ )
99
+ self._prompt_loader = prompt_loader
100
+
101
+ @staticmethod
102
+ def _parse_output(output: str) -> InvestingFocusAssetClassClassification:
103
+ """
104
+ Parse the output from the language model into an InvestingFocusAssetClassClassification object.
105
+
106
+ Args:
107
+ output (str): The raw output string from the language model.
108
+
109
+ Returns:
110
+ InvestingFocusAssetClassClassification: A structured representation of the classification result.
111
+
112
+ Raises:
113
+ ValueError: If the output contains an unknown investing focus or asset class or invalid confidence value.
114
+ """
115
+ lines = output.strip().split("\n")
116
+ parsed: dict[str, Any] = {}
117
+ for line in lines:
118
+ key, value = line.split(":", 1)
119
+ parsed[key.strip()] = value.strip()
120
+
121
+ investing_focus_asset_class_str = parsed["investing_focus_asset_class"].upper()
122
+
123
+ try:
124
+ investing_focus_asset_class = _INVESTING_FOCUS_ASSET_CLASS_MAPPINGS[investing_focus_asset_class_str]
125
+ except KeyError as e:
126
+ raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
127
+
128
+ try:
129
+ confidence = float(parsed["confidence"])
130
+ except ValueError:
131
+ raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
132
+
133
+ classification = InvestingFocusAssetClassClassification(
134
+ investing_focus_asset_class=investing_focus_asset_class,
135
+ confidence=confidence,
136
+ reasoning=parsed["reasoning"],
137
+ )
138
+
139
+ if investing_focus_asset_class == InvestingFocusAssetClass.OTHER and "other_description" in parsed:
140
+ other_description = parsed.get("other_description")
141
+ classification = InvestingFocusAssetClassClassification(
142
+ investing_focus_asset_class=investing_focus_asset_class,
143
+ confidence=confidence,
144
+ reasoning=parsed["reasoning"],
145
+ other_description=other_description,
146
+ )
147
+
148
+ return classification
149
+
150
+ async def classify_investing_focus_asset_class(
151
+ self, linkedin_profile: LinkedinProfile, work_experience: Position
152
+ ) -> InvestingFocusAssetClassClassification:
153
+ """
154
+ Classify a single work experience item from a LinkedIn profile into an investing focus or asset class.
155
+
156
+ This method prepares the input for the language model, sends the query,
157
+ and processes the result to classify the work experience item into an investing focus or asset class.
158
+
159
+ Args:
160
+ linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
161
+ work_experience (Position): The specific work experience item to classify.
162
+
163
+ Returns:
164
+ InvestingFocusAssetClassClassification: The classification result for the work experience item.
165
+
166
+ Raises:
167
+ ValueError: If the prompt evaluation fails to produce a result.
168
+ """
169
+ prompt = self._prompt_loader.create_prompt(
170
+ self._prompt_template,
171
+ llm_service=self._llm_service,
172
+ output_formatter=self._parse_output,
173
+ resume=format_profile_as_resume(linkedin_profile),
174
+ work_experience=format_position(work_experience),
175
+ )
176
+ return await prompt.evaluate() # type: ignore
src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ investing_focus_sector_classifier.py
3
+
4
+ This module provides functionality for classifying investing focus sectors
5
+ based on work experiences from LinkedIn profiles. It uses a language model to
6
+ determine the specific sector a position focuses on in terms of investments.
7
+
8
+ Classes:
9
+ InvestingFocusSector: Enum representing different investing focus sectors.
10
+ InvestingFocusSectorClassification: Pydantic model for classification results.
11
+ InvestingFocusSectorClassifier: Main class for classifying investing focus sectors.
12
+
13
+ Usage:
14
+ classifier = InvestingFocusSectorClassifier()
15
+ classification = await classifier.classify_investing_focus_sector(linkedin_profile, work_experience)
16
+ """
17
+
18
+ from enum import Enum
19
+ from typing import Any, Final
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+ from vsp.app import bindings
24
+ from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
25
+ from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
26
+ from vsp.app.prompts.prompt_loader import PromptLoader
27
+ from vsp.llm.llm_service import LLMService
28
+
29
+
30
+ class InvestingFocusSector(str, Enum):
31
+ """Enumeration of different investing focus sectors."""
32
+
33
+ GENERALIST = "Generalist"
34
+ HEALTHCARE = "Healthcare"
35
+ INDUSTRIALS = "Industrials"
36
+ BUSINESS_SERVICES = "Business Services"
37
+ CONSUMER_RETAIL = "Consumer & Retail"
38
+ ENERGY_NATURAL_RESOURCES = "Energy / Natural Resources / Cleantech / Utilities"
39
+ REAL_ESTATE_GAMING_LODGING = "Real Estate, Gaming & Lodging"
40
+ TECHNOLOGY_SOFTWARE_TMT = "Technology / Software / TMT"
41
+ MEDIA_ENTERTAINMENT = "Media & Entertainment"
42
+ FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
43
+ INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
44
+ OTHER = "Other"
45
+
46
+
47
+ _INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
48
+ sector.name: sector for sector in InvestingFocusSector
49
+ }
50
+
51
+
52
+ class InvestingFocusSectorClassification(BaseModel):
53
+ """
54
+ Pydantic model representing the classification result for an investing focus sector.
55
+
56
+ Attributes:
57
+ investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
58
+ confidence (float): Confidence level of the classification, between 0.0 and 1.0.
59
+ reasoning (str): Explanation for the classification decision.
60
+ """
61
+
62
+ investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
63
+ confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
64
+ reasoning: str = Field(description="Explanation for the classification")
65
+ model_config = {"frozen": True} # This makes the model immutable and hashable
66
+
67
+
68
+ class InvestingFocusSectorClassifier:
69
+ """
70
+ A class for classifying investing focus sectors based on work experiences from LinkedIn profiles.
71
+
72
+ This classifier uses a language model to determine the specific sector
73
+ a position focuses on in terms of investments, based on the information
74
+ provided in a LinkedIn profile and specific work experience.
75
+
76
+ Attributes:
77
+ _llm_service (LLMService): The language model service used for classification.
78
+ _prompt_template (Any): The template for generating prompts for the language model.
79
+ _prompt_loader (PromptLoader): The loader for prompt templates.
80
+ """
81
+
82
+ def __init__(
83
+ self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
84
+ ) -> None:
85
+ """
86
+ Initialize the InvestingFocusSectorClassifier.
87
+
88
+ Args:
89
+ llm_service (LLMService, optional): The language model service to use.
90
+ Defaults to the OpenAI service defined in bindings.
91
+ prompt_loader (PromptLoader, optional): The prompt loader to use.
92
+ Defaults to the prompt loader defined in bindings.
93
+ """
94
+ self._llm_service = llm_service
95
+ self._prompt_template = prompt_loader.load_template(
96
+ "work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier"
97
+ )
98
+ self._prompt_loader = prompt_loader
99
+
100
+ @staticmethod
101
+ def _parse_output(output: str) -> InvestingFocusSectorClassification:
102
+ """
103
+ Parse the output from the language model into an InvestingFocusSectorClassification object.
104
+
105
+ Args:
106
+ output (str): The raw output string from the language model.
107
+
108
+ Returns:
109
+ InvestingFocusSectorClassification: A structured representation of the classification result.
110
+
111
+ Raises:
112
+ ValueError: If the output contains an unknown investing focus sector or invalid confidence value.
113
+ """
114
+ lines = output.strip().split("\n")
115
+ parsed: dict[str, Any] = {}
116
+ for line in lines:
117
+ key, value = line.split(":", 1)
118
+ parsed[key.strip()] = value.strip()
119
+
120
+ investing_focus_sector_str = parsed["investing_focus_sector"].upper()
121
+
122
+ try:
123
+ investing_focus_sector = _INVESTING_FOCUS_SECTOR_MAPPINGS[investing_focus_sector_str]
124
+ except KeyError as e:
125
+ raise ValueError(f"Unknown investing focus sector: {str(e)}")
126
+
127
+ try:
128
+ confidence = float(parsed["confidence"])
129
+ except ValueError:
130
+ raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
131
+
132
+ classification = InvestingFocusSectorClassification(
133
+ investing_focus_sector=investing_focus_sector,
134
+ confidence=confidence,
135
+ reasoning=parsed["reasoning"],
136
+ )
137
+ return classification
138
+
139
+ async def classify_investing_focus_sector(
140
+ self, linkedin_profile: LinkedinProfile, work_experience: Position
141
+ ) -> InvestingFocusSectorClassification:
142
+ """
143
+ Classify a single work experience item from a LinkedIn profile into an investing focus sector.
144
+
145
+ This method prepares the input for the language model, sends the query,
146
+ and processes the result to classify the work experience item into an investing focus sector.
147
+
148
+ Args:
149
+ linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
150
+ work_experience (Position): The specific work experience item to classify.
151
+
152
+ Returns:
153
+ InvestingFocusSectorClassification: The classification result for the work experience item.
154
+
155
+ Raises:
156
+ ValueError: If the prompt evaluation fails to produce a result.
157
+ """
158
+ prompt = self._prompt_loader.create_prompt(
159
+ self._prompt_template,
160
+ llm_service=self._llm_service,
161
+ output_formatter=self._parse_output,
162
+ resume=format_profile_as_resume(linkedin_profile),
163
+ work_experience=format_position(work_experience),
164
+ )
165
+ return await prompt.evaluate() # type: ignore
src/vsp/app/main.py CHANGED
@@ -40,6 +40,14 @@ from vsp.app.classifiers.work_experience.general_work_experience_classifier impo
40
  WorkExperienceClassification,
41
  WorkExperienceClassifier,
42
  )
 
 
 
 
 
 
 
 
43
  from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
44
  InvestmentBankingGroupClassification,
45
  InvestmentBankingGroupClassifier,
@@ -69,11 +77,17 @@ class ClassifiedWorkExperience(BaseModel):
69
  work_experience_classification (WorkExperienceClassification): The general work experience classification.
70
  investment_banking_classification (InvestmentBankingGroupClassification | None):
71
  The investment banking group classification, if applicable.
 
 
 
 
72
  """
73
 
74
  position: Position
75
  work_experience_classification: WorkExperienceClassification
76
  investment_banking_classification: InvestmentBankingGroupClassification | None = None
 
 
77
 
78
 
79
  class LinkedinProfileClassificationResults(BaseModel):
@@ -98,6 +112,8 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
98
  2. Classifies all work experience items in the profile.
99
  3. For work experiences classified as investment banking, performs an additional
100
  investment banking group classification.
 
 
101
 
102
  Args:
103
  profile (LinkedinProfile): The LinkedIn profile to process.
@@ -108,6 +124,8 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
108
  education_classifier = EducationClassifier()
109
  work_experience_classifier = WorkExperienceClassifier()
110
  investment_banking_classifier = InvestmentBankingGroupClassifier()
 
 
111
 
112
  # Classify educations
113
  education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
@@ -119,7 +137,7 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
119
  ]
120
  work_experience_classifications = await asyncio.gather(*work_experience_tasks)
121
 
122
- # Classify investment banking groups for relevant positions
123
  classified_work_experiences = []
124
  for position, work_classification in zip(profile.positions, work_experience_classifications):
125
  classified_work_experience = ClassifiedWorkExperience(
@@ -130,6 +148,17 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
130
  ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
131
  classified_work_experience.investment_banking_classification = ib_classification
132
 
 
 
 
 
 
 
 
 
 
 
 
133
  classified_work_experiences.append(classified_work_experience)
134
 
135
  # Prepare the results using Pydantic models
 
40
  WorkExperienceClassification,
41
  WorkExperienceClassifier,
42
  )
43
+ from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
44
+ InvestingFocusAssetClassClassification,
45
+ InvestingFocusAssetClassClassifier,
46
+ )
47
+ from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
48
+ InvestingFocusSectorClassification,
49
+ InvestingFocusSectorClassifier,
50
+ )
51
  from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
52
  InvestmentBankingGroupClassification,
53
  InvestmentBankingGroupClassifier,
 
77
  work_experience_classification (WorkExperienceClassification): The general work experience classification.
78
  investment_banking_classification (InvestmentBankingGroupClassification | None):
79
  The investment banking group classification, if applicable.
80
+ investing_focus_asset_class_classification (InvestingFocusAssetClassClassification | None):
81
+ The investing focus or asset class classification, if applicable.
82
+ investing_focus_sector_classification (InvestingFocusSectorClassification | None):
83
+ The investing focus sector classification, if applicable.
84
  """
85
 
86
  position: Position
87
  work_experience_classification: WorkExperienceClassification
88
  investment_banking_classification: InvestmentBankingGroupClassification | None = None
89
+ investing_focus_asset_class_classification: InvestingFocusAssetClassClassification | None = None
90
+ investing_focus_sector_classification: InvestingFocusSectorClassification | None = None
91
 
92
 
93
  class LinkedinProfileClassificationResults(BaseModel):
 
112
  2. Classifies all work experience items in the profile.
113
  3. For work experiences classified as investment banking, performs an additional
114
  investment banking group classification.
115
+ 4. For work experiences classified as investing, performs an additional
116
+ investing focus or asset class classification.
117
 
118
  Args:
119
  profile (LinkedinProfile): The LinkedIn profile to process.
 
124
  education_classifier = EducationClassifier()
125
  work_experience_classifier = WorkExperienceClassifier()
126
  investment_banking_classifier = InvestmentBankingGroupClassifier()
127
+ investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
128
+ investing_focus_sector_classifier = InvestingFocusSectorClassifier()
129
 
130
  # Classify educations
131
  education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
 
137
  ]
138
  work_experience_classifications = await asyncio.gather(*work_experience_tasks)
139
 
140
+ # Classify investment banking groups and investing focus/asset classes for relevant positions
141
  classified_work_experiences = []
142
  for position, work_classification in zip(profile.positions, work_experience_classifications):
143
  classified_work_experience = ClassifiedWorkExperience(
 
148
  ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
149
  classified_work_experience.investment_banking_classification = ib_classification
150
 
151
+ if work_classification.secondary_job_type == SecondaryJobType.INVESTING:
152
+ investing_asset_class_classification = (
153
+ await investing_focus_asset_class_classifier.classify_investing_focus_asset_class(profile, position)
154
+ )
155
+ classified_work_experience.investing_focus_asset_class_classification = investing_asset_class_classification
156
+
157
+ investing_sector_classification = await investing_focus_sector_classifier.classify_investing_focus_sector(
158
+ profile, position
159
+ )
160
+ classified_work_experience.investing_focus_sector_classification = investing_sector_classification
161
+
162
  classified_work_experiences.append(classified_work_experience)
163
 
164
  # Prepare the results using Pydantic models
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please classify the following work experience item based on the job candidate's full resume and the specific work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investing focus or asset class.
2
+
3
+ Full Resume:
4
+ {resume}
5
+
6
+ Specific Work Experience Item:
7
+ {work_experience}
8
+
9
+ Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
10
+
11
+ NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
12
+
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert in analyzing professional work experiences and classifying them according to specific investing focus and asset classes. Your task is to examine a job candidate's full resume and a specific work experience item from their LinkedIn profile, then classify the work experience into one of the predefined investing focus or asset class categories.
2
+
3
+ Use the provided information carefully to make an accurate classification. Pay close attention to job titles, responsibilities, company descriptions, and any other relevant information provided in both the resume and the specific work experience item.
4
+
5
+ Investing Focus / Asset Class categories:
6
+ 1. EARLY_STAGE_VC: Early-stage venture capital investments
7
+ 2. LATE_STAGE_VC: Late-stage venture capital investments
8
+ 3. MULTI_STAGE_VC: Venture capital investments across multiple stages
9
+ 4. GROWTH_EQUITY: Growth equity investments
10
+ 5. PRE_IPO: Pre-IPO investments
11
+ 6. PUBLIC_EQUITIES: Investments in publicly traded equities
12
+ 7. REAL_ESTATE: Real estate investments
13
+ 8. PRIVATE_EQUITY_BUYOUTS: Private equity and buyout investments
14
+ 9. HEDGE_FUND: Hedge fund investments
15
+ 10. CREDIT: Credit investments
16
+ 11. SECONDARIES: Secondary market investments
17
+ 12. OTHER: Any focus or asset class that doesn't fit the above categories
18
+
19
+ Provide your response in the following format exactly:
20
+
21
+ investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
22
+ other_description: [Only if OTHER is selected, provide a brief description]
23
+ confidence: [0.0 to 1.0]
24
+ reasoning: [Your explanation here]
25
+
26
+ Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
27
+
28
+ The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER.
29
+
30
+ Your confidence level should reflect how certain you are about your classification based on the information provided.
31
+
32
+ In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please classify the following work experience item based on the job candidate's full resume and the specific work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investing focus sector.
2
+
3
+ Full Resume:
4
+ {resume}
5
+
6
+ Specific Work Experience Item:
7
+ {work_experience}
8
+
9
+ Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision.
10
+
11
+ NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an expert in analyzing professional work experiences and classifying them according to specific investing focus sectors. Your task is to examine a job candidate's full resume and a specific work experience item from their LinkedIn profile, then classify the work experience into one of the predefined investing focus sector categories.
2
+
3
+ Use the provided information carefully to make an accurate classification. Pay close attention to job titles, responsibilities, company descriptions, and any other relevant information provided in both the resume and the specific work experience item.
4
+
5
+ Investing Focus Sector categories:
6
+ 1. GENERALIST: A broad focus across multiple sectors
7
+ 2. HEALTHCARE: Focus on healthcare, biotech, pharmaceuticals, and medical devices
8
+ 3. INDUSTRIALS: Focus on manufacturing, aerospace, defense, and other industrial sectors
9
+ 4. BUSINESS_SERVICES: Focus on professional services, consulting, and B2B companies
10
+ 5. CONSUMER_RETAIL: Focus on consumer goods, retail, and e-commerce
11
+ 6. ENERGY_NATURAL_RESOURCES: Focus on energy, natural resources, cleantech, and utilities
12
+ 7. REAL_ESTATE_GAMING_LODGING: Focus on real estate, gaming, casinos, and hospitality
13
+ 8. TECHNOLOGY_SOFTWARE_TMT: Focus on technology, software, and telecom
14
+ 9. MEDIA_ENTERTAINMENT: Focus on media, entertainment, and content creation
15
+ 10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
16
+ 11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
17
+ 12. OTHER: Any focus that doesn't fit the above categories
18
+
19
+ Provide your response in the following format exactly:
20
+
21
+ investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
22
+ confidence: [0.0 to 1.0]
23
+ reasoning: [Your explanation here]
24
+
25
+ Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
26
+
27
+ The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER.
28
+
29
+ Your confidence level should reflect how certain you are about your classification based on the information provided.
30
+
31
+ In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision.
tests/vsp/app/test_main.py CHANGED
@@ -8,6 +8,14 @@ from vsp.app.classifiers.work_experience.general_work_experience_classifier impo
8
  SecondaryJobType,
9
  WorkExperienceClassification,
10
  )
 
 
 
 
 
 
 
 
11
  from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
12
  InvestmentBankingGroup,
13
  InvestmentBankingGroupClassification,
@@ -33,6 +41,10 @@ def sample_linkedin_profile():
33
  title="Investment Banking Analyst",
34
  company_name="Bank Corp",
35
  ),
 
 
 
 
36
  Position(
37
  title="Software Engineer",
38
  company_name="Tech Corp",
@@ -42,53 +54,123 @@ def sample_linkedin_profile():
42
 
43
 
44
  @pytest.mark.asyncio
45
- async def test_process_linkedin_profile_investment_banking(sample_linkedin_profile):
46
  with (
47
  patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
48
  patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
49
  patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
 
 
50
  ):
51
  mock_education_classifier.return_value.classify_education = AsyncMock(
52
  return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
53
  )
54
  mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
55
- return_value=WorkExperienceClassification(
56
- primary_job_type=PrimaryJobType.FULL_TIME,
57
- secondary_job_type=SecondaryJobType.INVESTMENT_BANKING,
58
- confidence=1.0,
59
- reasoning="Test",
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  )
62
  mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
63
  return_value=InvestmentBankingGroupClassification(
64
  investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
65
  )
66
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  result = await process_linkedin_profile(sample_linkedin_profile)
69
 
70
  assert isinstance(result, LinkedinProfileClassificationResults)
71
  assert len(result.classified_educations) == 1
72
- assert len(result.classified_work_experiences) == 2
73
 
74
- # Check that the investment banking position has an investment banking group classification
75
  ib_experience = result.classified_work_experiences[0]
76
  assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
77
  assert ib_experience.investment_banking_classification is not None
78
  assert (
79
  ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
80
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- # Check that the investment banking group classifier was called
83
- mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_called()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  @pytest.mark.asyncio
87
- async def test_process_linkedin_profile_non_investment_banking(sample_linkedin_profile):
 
 
 
 
 
 
 
88
  with (
89
  patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
90
  patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
91
  patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
 
 
92
  ):
93
  mock_education_classifier.return_value.classify_education = AsyncMock(
94
  return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
@@ -101,21 +183,23 @@ async def test_process_linkedin_profile_non_investment_banking(sample_linkedin_p
101
  reasoning="Test",
102
  )
103
  )
104
- mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock()
105
 
106
  result = await process_linkedin_profile(sample_linkedin_profile)
107
 
108
  assert isinstance(result, LinkedinProfileClassificationResults)
109
  assert len(result.classified_educations) == 1
110
- assert len(result.classified_work_experiences) == 2
111
 
112
- # Check that none of the work experiences have an investment banking group classification
113
- for experience in result.classified_work_experiences:
114
- assert experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
115
- assert experience.investment_banking_classification is None
 
116
 
117
- # Check that the investment banking group classifier was not called
118
  mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
 
 
119
 
120
 
121
  if __name__ == "__main__":
 
8
  SecondaryJobType,
9
  WorkExperienceClassification,
10
  )
11
+ from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
12
+ InvestingFocusAssetClass,
13
+ InvestingFocusAssetClassClassification,
14
+ )
15
+ from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
16
+ InvestingFocusSector,
17
+ InvestingFocusSectorClassification,
18
+ )
19
  from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
20
  InvestmentBankingGroup,
21
  InvestmentBankingGroupClassification,
 
41
  title="Investment Banking Analyst",
42
  company_name="Bank Corp",
43
  ),
44
+ Position(
45
+ title="Investment Associate",
46
+ company_name="VC Firm",
47
+ ),
48
  Position(
49
  title="Software Engineer",
50
  company_name="Tech Corp",
 
54
 
55
 
56
  @pytest.mark.asyncio
57
+ async def test_process_linkedin_profile_comprehensive(sample_linkedin_profile):
58
  with (
59
  patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
60
  patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
61
  patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
62
+ patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
63
+ patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
64
  ):
65
  mock_education_classifier.return_value.classify_education = AsyncMock(
66
  return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
67
  )
68
  mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
69
+ side_effect=[
70
+ WorkExperienceClassification(
71
+ primary_job_type=PrimaryJobType.FULL_TIME,
72
+ secondary_job_type=SecondaryJobType.INVESTMENT_BANKING,
73
+ confidence=1.0,
74
+ reasoning="Test",
75
+ ),
76
+ WorkExperienceClassification(
77
+ primary_job_type=PrimaryJobType.FULL_TIME,
78
+ secondary_job_type=SecondaryJobType.INVESTING,
79
+ confidence=1.0,
80
+ reasoning="Test",
81
+ ),
82
+ WorkExperienceClassification(
83
+ primary_job_type=PrimaryJobType.FULL_TIME,
84
+ secondary_job_type=SecondaryJobType.ENGINEERING,
85
+ confidence=1.0,
86
+ reasoning="Test",
87
+ ),
88
+ ]
89
  )
90
  mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
91
  return_value=InvestmentBankingGroupClassification(
92
  investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
93
  )
94
  )
95
+ mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class = AsyncMock(
96
+ return_value=InvestingFocusAssetClassClassification(
97
+ investing_focus_asset_class=InvestingFocusAssetClass.EARLY_STAGE_VC,
98
+ confidence=1.0,
99
+ reasoning="Test",
100
+ )
101
+ )
102
+ mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector = AsyncMock(
103
+ return_value=InvestingFocusSectorClassification(
104
+ investing_focus_sector=InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT,
105
+ confidence=1.0,
106
+ reasoning="Test",
107
+ )
108
+ )
109
 
110
  result = await process_linkedin_profile(sample_linkedin_profile)
111
 
112
  assert isinstance(result, LinkedinProfileClassificationResults)
113
  assert len(result.classified_educations) == 1
114
+ assert len(result.classified_work_experiences) == 3
115
 
116
+ # Check investment banking position
117
  ib_experience = result.classified_work_experiences[0]
118
  assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
119
  assert ib_experience.investment_banking_classification is not None
120
  assert (
121
  ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
122
  )
123
+ assert ib_experience.investing_focus_asset_class_classification is None
124
+ assert ib_experience.investing_focus_sector_classification is None
125
+
126
+ # Check investing position
127
+ investing_experience = result.classified_work_experiences[1]
128
+ assert investing_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTING
129
+ assert investing_experience.investment_banking_classification is None
130
+ assert investing_experience.investing_focus_asset_class_classification is not None
131
+ assert (
132
+ investing_experience.investing_focus_asset_class_classification.investing_focus_asset_class
133
+ == InvestingFocusAssetClass.EARLY_STAGE_VC
134
+ )
135
+ assert investing_experience.investing_focus_sector_classification is not None
136
+ assert (
137
+ investing_experience.investing_focus_sector_classification.investing_focus_sector
138
+ == InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT
139
+ )
140
 
141
+ # Check engineering position
142
+ eng_experience = result.classified_work_experiences[2]
143
+ assert eng_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
144
+ assert eng_experience.investment_banking_classification is None
145
+ assert eng_experience.investing_focus_asset_class_classification is None
146
+ assert eng_experience.investing_focus_sector_classification is None
147
+
148
+ # Check that the classifiers were called the correct number of times
149
+ assert mock_education_classifier.return_value.classify_education.call_count == 1
150
+ assert mock_work_experience_classifier.return_value.classify_work_experience.call_count == 3
151
+ assert mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.call_count == 1
152
+ assert (
153
+ mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.call_count
154
+ == 1
155
+ )
156
+ assert mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.call_count == 1
157
 
158
 
159
  @pytest.mark.asyncio
160
+ async def test_process_linkedin_profile_no_investing(sample_linkedin_profile):
161
+ sample_linkedin_profile.positions = [
162
+ Position(
163
+ title="Software Engineer",
164
+ company_name="Tech Corp",
165
+ )
166
+ ]
167
+
168
  with (
169
  patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
170
  patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
171
  patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
172
+ patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
173
+ patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
174
  ):
175
  mock_education_classifier.return_value.classify_education = AsyncMock(
176
  return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
 
183
  reasoning="Test",
184
  )
185
  )
 
186
 
187
  result = await process_linkedin_profile(sample_linkedin_profile)
188
 
189
  assert isinstance(result, LinkedinProfileClassificationResults)
190
  assert len(result.classified_educations) == 1
191
+ assert len(result.classified_work_experiences) == 1
192
 
193
+ work_experience = result.classified_work_experiences[0]
194
+ assert work_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
195
+ assert work_experience.investment_banking_classification is None
196
+ assert work_experience.investing_focus_asset_class_classification is None
197
+ assert work_experience.investing_focus_sector_classification is None
198
 
199
+ # ensure investment banking, investing focus asset class, and investing focus sector classifiers were not called
200
  mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
201
+ mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.assert_not_called()
202
+ mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.assert_not_called()
203
 
204
 
205
  if __name__ == "__main__":