navkast
commited on
Investment focus - asset class, sector - classifiers (#4)
Browse files* Investment focus asset class classifier
* Investment focus sector classifier
- src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb +137 -0
- src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb +131 -0
- src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py +176 -0
- src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py +165 -0
- src/vsp/app/main.py +30 -1
- src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt +12 -0
- src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt +32 -0
- src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt +11 -0
- src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt +31 -0
- tests/vsp/app/test_main.py +103 -19
src/notebooks/classifiers/work_experience/investment_focus_asset_class_classifier.ipynb
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"os.getcwd()\n",
|
| 12 |
+
"os.chdir(path=os.getcwd() + \"/../../../\")"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 2,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [
|
| 20 |
+
{
|
| 21 |
+
"name": "stdout",
|
| 22 |
+
"output_type": "stream",
|
| 23 |
+
"text": [
|
| 24 |
+
"/Volumes/code/geometric/VSP-data-enrichment/src\n"
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"source": [
|
| 29 |
+
"import json\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"print(os.getcwd())\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"with open(\"notebooks/data/eric_armagost.json\") as f:\n",
|
| 36 |
+
" data = json.load(f)\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"profile = LinkedinProfile.profile_from_json(data)"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 3,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [
|
| 46 |
+
{
|
| 47 |
+
"name": "stdout",
|
| 48 |
+
"output_type": "stream",
|
| 49 |
+
"text": [
|
| 50 |
+
"\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-10 13:03:31\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-10 13:03:32\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1062\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m102\u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1139\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m97\u001b[0m\n",
|
| 59 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 60 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1062\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m116\u001b[0m\n",
|
| 61 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 62 |
+
"\u001b[2m2024-09-10 13:03:33\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m114\u001b[0m\n",
|
| 63 |
+
"{\n",
|
| 64 |
+
" \"investing_focus_asset_class\": \"Other\",\n",
|
| 65 |
+
" \"confidence\": 0.4,\n",
|
| 66 |
+
" \"reasoning\": \"The specific work experience at William Blair & Company is categorized as \\\"Investment Banking,\\\" which primarily focuses on advisory services rather than direct investments in asset classes like venture capital or private equity. The lack of detailed investment-related responsibilities in the description further supports this classification, leading to a lower confidence level.\",\n",
|
| 67 |
+
" \"other_description\": \"Investment banking typically involves advisory services for mergers and acquisitions, capital raising, and financial restructuring, rather than direct investments.\"\n",
|
| 68 |
+
"}\n",
|
| 69 |
+
"{\n",
|
| 70 |
+
" \"investing_focus_asset_class\": \"Private Equity / Buyouts\",\n",
|
| 71 |
+
" \"confidence\": 0.9,\n",
|
| 72 |
+
" \"reasoning\": \"The specific work experience at Accel-KKR indicates a focus on private equity investments, particularly in technology and software sectors. The description highlights typical transactions such as acquisitions, buyouts of divisions from public companies, and take-private transactions, which are all characteristic of private equity buyouts. This aligns well with the classification of PRIVATE_EQUITY_BUYOUTS.\",\n",
|
| 73 |
+
" \"other_description\": null\n",
|
| 74 |
+
"}\n",
|
| 75 |
+
"{\n",
|
| 76 |
+
" \"investing_focus_asset_class\": \"Other\",\n",
|
| 77 |
+
" \"confidence\": 0.4,\n",
|
| 78 |
+
" \"reasoning\": \"The specific work experience at Fidelity Investments as FFAS Corporate Finance does not provide clear evidence of a direct focus on any of the predefined investing categories. The role likely involved corporate finance activities, which may include advisory services rather than direct investment activities in the specified asset classes. Therefore, I classified it as OTHER, with a low confidence level due to the lack of detailed investment-related responsibilities in the description.\",\n",
|
| 79 |
+
" \"other_description\": \"Corporate finance within a financial services context, focusing on financial advisory and capital raising.\"\n",
|
| 80 |
+
"}\n",
|
| 81 |
+
"{\n",
|
| 82 |
+
" \"investing_focus_asset_class\": \"Other\",\n",
|
| 83 |
+
" \"confidence\": 0.4,\n",
|
| 84 |
+
" \"reasoning\": \"The specific work experience at Fidelity Investments as a PI Corporate Finance professional does not provide enough detail to classify it into one of the predefined investing focus or asset class categories. The lack of a description of responsibilities or specific investment activities makes it difficult to ascertain a clear focus. Therefore, I classified it as OTHER, reflecting its general corporate finance nature within the financial services sector.\",\n",
|
| 85 |
+
" \"other_description\": \"Corporate finance role focused on financial services, not directly tied to a specific investing focus or asset class.\"\n",
|
| 86 |
+
"}\n"
|
| 87 |
+
]
|
| 88 |
+
}
|
| 89 |
+
],
|
| 90 |
+
"source": [
|
| 91 |
+
"import asyncio\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (\n",
|
| 94 |
+
" InvestingFocusAssetClassClassifier,\n",
|
| 95 |
+
")\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()\n",
|
| 98 |
+
"\n",
|
| 99 |
+
"all_positions_classified = []\n",
|
| 100 |
+
"\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"async def classify_investing_focus_asset_class(profile, position):\n",
|
| 103 |
+
" classification = await investing_focus_asset_class_classifier.classify_investing_focus_asset_class(\n",
|
| 104 |
+
" profile, position\n",
|
| 105 |
+
" )\n",
|
| 106 |
+
" all_positions_classified.append(classification)\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"\n",
|
| 109 |
+
"await asyncio.gather(*[classify_investing_focus_asset_class(profile, position) for position in profile.positions])\n",
|
| 110 |
+
"\n",
|
| 111 |
+
"for classification in all_positions_classified:\n",
|
| 112 |
+
" print(classification.model_dump_json(indent=2))"
|
| 113 |
+
]
|
| 114 |
+
}
|
| 115 |
+
],
|
| 116 |
+
"metadata": {
|
| 117 |
+
"kernelspec": {
|
| 118 |
+
"display_name": ".venv",
|
| 119 |
+
"language": "python",
|
| 120 |
+
"name": "python3"
|
| 121 |
+
},
|
| 122 |
+
"language_info": {
|
| 123 |
+
"codemirror_mode": {
|
| 124 |
+
"name": "ipython",
|
| 125 |
+
"version": 3
|
| 126 |
+
},
|
| 127 |
+
"file_extension": ".py",
|
| 128 |
+
"mimetype": "text/x-python",
|
| 129 |
+
"name": "python",
|
| 130 |
+
"nbconvert_exporter": "python",
|
| 131 |
+
"pygments_lexer": "ipython3",
|
| 132 |
+
"version": "3.12.5"
|
| 133 |
+
}
|
| 134 |
+
},
|
| 135 |
+
"nbformat": 4,
|
| 136 |
+
"nbformat_minor": 2
|
| 137 |
+
}
|
src/notebooks/classifiers/work_experience/investment_focus_sectors_classifier.ipynb
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"os.getcwd()\n",
|
| 12 |
+
"os.chdir(path=os.getcwd() + \"/../../../\")"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 2,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [
|
| 20 |
+
{
|
| 21 |
+
"name": "stdout",
|
| 22 |
+
"output_type": "stream",
|
| 23 |
+
"text": [
|
| 24 |
+
"/Volumes/code/geometric/VSP-data-enrichment/src\n"
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"source": [
|
| 29 |
+
"import json\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"from vsp.app.model.linkedin.linkedin_models import LinkedinProfile\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"print(os.getcwd())\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"with open(\"notebooks/data/eric_armagost.json\") as f:\n",
|
| 36 |
+
" data = json.load(f)\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"profile = LinkedinProfile.profile_from_json(data)"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 4,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [
|
| 46 |
+
{
|
| 47 |
+
"name": "stdout",
|
| 48 |
+
"output_type": "stream",
|
| 49 |
+
"text": [
|
| 50 |
+
"\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFetching parameter from Parameter Store\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 51 |
+
"\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCreating boto3 client \u001b[0m \u001b[36mservice\u001b[0m=\u001b[35mssm\u001b[0m\n",
|
| 52 |
+
"\u001b[2m2024-09-10 14:00:22\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAssuming role \u001b[0m \u001b[36mrole_arn\u001b[0m=\u001b[35marn:aws:iam::339713096219:role/ReadWrite\u001b[0m\n",
|
| 53 |
+
"\u001b[2m2024-09-10 14:00:23\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRole assumed successfully \u001b[0m\n",
|
| 54 |
+
"\u001b[2m2024-09-10 14:00:23\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mSuccessfully fetched parameter\u001b[0m \u001b[36mparameter\u001b[0m=\u001b[35m/secrets/openai/api_key\u001b[0m\n",
|
| 55 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 56 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1138\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m92\u001b[0m\n",
|
| 57 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 58 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m99\u001b[0m\n",
|
| 59 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 60 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1060\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m95\u001b[0m\n",
|
| 61 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOpenAI API called \u001b[0m \u001b[36mmodel\u001b[0m=\u001b[35mgpt-4o-mini\u001b[0m\n",
|
| 62 |
+
"\u001b[2m2024-09-10 14:00:24\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mToken usage \u001b[0m \u001b[36minput_tokens\u001b[0m=\u001b[35m1061\u001b[0m \u001b[36moutput_tokens\u001b[0m=\u001b[35m115\u001b[0m\n",
|
| 63 |
+
"{\n",
|
| 64 |
+
" \"investing_focus_sector\": \"Technology / Software / TMT\",\n",
|
| 65 |
+
" \"confidence\": 0.9,\n",
|
| 66 |
+
" \"reasoning\": \"The specific work experience at Accel-KKR clearly indicates a focus on investing in software and technology-enabled services companies. The description highlights that the firm is dedicated exclusively to this sector, which aligns with the TECHNOLOGY_SOFTWARE_TMT category. The candidate's role as an Investment Professional further supports this classification, as it involves transactions related to technology companies.\"\n",
|
| 67 |
+
"}\n",
|
| 68 |
+
"{\n",
|
| 69 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 70 |
+
" \"confidence\": 0.8,\n",
|
| 71 |
+
" \"reasoning\": \"The specific work experience at Fidelity Investments is clearly within the financial services sector, as indicated by the company name and industry classification. The role in FFAS Corporate Finance suggests involvement in financial analysis and corporate finance activities, which aligns with the focus on financial institutions. While the description lacks detail, the context of Fidelity Investments as a major player in financial services supports a strong classification in this sector.\"\n",
|
| 72 |
+
"}\n",
|
| 73 |
+
"{\n",
|
| 74 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 75 |
+
" \"confidence\": 0.8,\n",
|
| 76 |
+
" \"reasoning\": \"The specific work experience item is from Fidelity Investments, a company clearly identified within the financial services sector. The job title \\\"PI Corporate Finance\\\" suggests a focus on corporate finance activities, which are typically associated with financial institutions. While the description lacks detail, the context of the company and the role strongly indicate a focus on financial services, justifying a high confidence level.\"\n",
|
| 77 |
+
"}\n",
|
| 78 |
+
"{\n",
|
| 79 |
+
" \"investing_focus_sector\": \"Financial Institutions Group (FIG) / Fintech\",\n",
|
| 80 |
+
" \"confidence\": 0.8,\n",
|
| 81 |
+
" \"reasoning\": \"The specific work experience item indicates that the candidate worked in Investment Banking at William Blair & Company, which is categorized under Financial Services. This aligns with the focus on financial institutions, as investment banking involves services related to capital raising, mergers and acquisitions, and financial advisory, all of which are core functions of financial institutions. The absence of a detailed description does not detract significantly from this classification, as the job title and company industry provide strong evidence for the sector classification.\"\n",
|
| 82 |
+
"}\n"
|
| 83 |
+
]
|
| 84 |
+
}
|
| 85 |
+
],
|
| 86 |
+
"source": [
|
| 87 |
+
"import asyncio\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (\n",
|
| 90 |
+
" InvestingFocusSectorClassifier,\n",
|
| 91 |
+
")\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"investing_focus_sector_classifier = InvestingFocusSectorClassifier()\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"all_positions_classified = []\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"async def classify_investing_focus_sectors(profile, position):\n",
|
| 99 |
+
" classification = await investing_focus_sector_classifier.classify_investing_focus_sector(profile, position)\n",
|
| 100 |
+
" all_positions_classified.append(classification)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"await asyncio.gather(*[classify_investing_focus_sectors(profile, position) for position in profile.positions])\n",
|
| 104 |
+
"\n",
|
| 105 |
+
"for classification in all_positions_classified:\n",
|
| 106 |
+
" print(classification.model_dump_json(indent=2))"
|
| 107 |
+
]
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"metadata": {
|
| 111 |
+
"kernelspec": {
|
| 112 |
+
"display_name": ".venv",
|
| 113 |
+
"language": "python",
|
| 114 |
+
"name": "python3"
|
| 115 |
+
},
|
| 116 |
+
"language_info": {
|
| 117 |
+
"codemirror_mode": {
|
| 118 |
+
"name": "ipython",
|
| 119 |
+
"version": 3
|
| 120 |
+
},
|
| 121 |
+
"file_extension": ".py",
|
| 122 |
+
"mimetype": "text/x-python",
|
| 123 |
+
"name": "python",
|
| 124 |
+
"nbconvert_exporter": "python",
|
| 125 |
+
"pygments_lexer": "ipython3",
|
| 126 |
+
"version": "3.12.5"
|
| 127 |
+
}
|
| 128 |
+
},
|
| 129 |
+
"nbformat": 4,
|
| 130 |
+
"nbformat_minor": 2
|
| 131 |
+
}
|
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
investing_focus_asset_class_classifier.py
|
| 3 |
+
|
| 4 |
+
This module provides functionality for classifying investing focus and asset classes
|
| 5 |
+
based on work experiences from LinkedIn profiles. It uses a language model to
|
| 6 |
+
determine the specific investing focus or asset class a position belongs to.
|
| 7 |
+
|
| 8 |
+
Classes:
|
| 9 |
+
InvestingFocusAssetClass: Enum representing different investing focus and asset classes.
|
| 10 |
+
InvestingFocusAssetClassClassification: Pydantic model for classification results.
|
| 11 |
+
InvestingFocusAssetClassClassifier: Main class for classifying investing focus and asset classes.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
classifier = InvestingFocusAssetClassClassifier()
|
| 15 |
+
classification = await classifier.classify_investing_focus_asset_class(linkedin_profile, work_experience)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from enum import Enum
|
| 19 |
+
from typing import Any, Final
|
| 20 |
+
|
| 21 |
+
from pydantic import BaseModel, Field
|
| 22 |
+
|
| 23 |
+
from vsp.app import bindings
|
| 24 |
+
from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
|
| 25 |
+
from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
|
| 26 |
+
from vsp.app.prompts.prompt_loader import PromptLoader
|
| 27 |
+
from vsp.llm.llm_service import LLMService
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InvestingFocusAssetClass(str, Enum):
|
| 31 |
+
"""Enumeration of different investing focus and asset classes."""
|
| 32 |
+
|
| 33 |
+
EARLY_STAGE_VC = "Early-stage VC"
|
| 34 |
+
LATE_STAGE_VC = "Late-stage VC"
|
| 35 |
+
MULTI_STAGE_VC = "Multi-stage VC"
|
| 36 |
+
GROWTH_EQUITY = "Growth Equity"
|
| 37 |
+
PRE_IPO = "Pre-IPO"
|
| 38 |
+
PUBLIC_EQUITIES = "Public Equities"
|
| 39 |
+
REAL_ESTATE = "Real Estate"
|
| 40 |
+
PRIVATE_EQUITY_BUYOUTS = "Private Equity / Buyouts"
|
| 41 |
+
HEDGE_FUND = "Hedge Fund"
|
| 42 |
+
CREDIT = "Credit"
|
| 43 |
+
SECONDARIES = "Secondaries"
|
| 44 |
+
OTHER = "Other"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
_INVESTING_FOCUS_ASSET_CLASS_MAPPINGS: Final[dict[str, InvestingFocusAssetClass]] = {
|
| 48 |
+
focus.name: focus for focus in InvestingFocusAssetClass
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class InvestingFocusAssetClassClassification(BaseModel):
|
| 53 |
+
"""
|
| 54 |
+
Pydantic model representing the classification result for an investing focus or asset class.
|
| 55 |
+
|
| 56 |
+
Attributes:
|
| 57 |
+
investing_focus_asset_class (InvestingFocusAssetClass): The classified investing focus or asset class.
|
| 58 |
+
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 59 |
+
reasoning (str): Explanation for the classification decision.
|
| 60 |
+
other_description (str | None): Description for 'Other' classification, if applicable.
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
investing_focus_asset_class: InvestingFocusAssetClass = Field(description="The investing focus or asset class")
|
| 64 |
+
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 65 |
+
reasoning: str = Field(description="Explanation for the classification")
|
| 66 |
+
other_description: str | None = Field(default=None, description="Description for 'Other' classification")
|
| 67 |
+
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class InvestingFocusAssetClassClassifier:
|
| 71 |
+
"""
|
| 72 |
+
A class for classifying investing focus and asset classes based on work experiences from LinkedIn profiles.
|
| 73 |
+
|
| 74 |
+
This classifier uses a language model to determine the specific investing focus or asset class
|
| 75 |
+
a position belongs to based on the information provided in a LinkedIn profile and specific work experience.
|
| 76 |
+
|
| 77 |
+
Attributes:
|
| 78 |
+
_llm_service (LLMService): The language model service used for classification.
|
| 79 |
+
_prompt_template (Any): The template for generating prompts for the language model.
|
| 80 |
+
_prompt_loader (PromptLoader): The loader for prompt templates.
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
def __init__(
|
| 84 |
+
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
|
| 85 |
+
) -> None:
|
| 86 |
+
"""
|
| 87 |
+
Initialize the InvestingFocusAssetClassClassifier.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
llm_service (LLMService, optional): The language model service to use.
|
| 91 |
+
Defaults to the OpenAI service defined in bindings.
|
| 92 |
+
prompt_loader (PromptLoader, optional): The prompt loader to use.
|
| 93 |
+
Defaults to the prompt loader defined in bindings.
|
| 94 |
+
"""
|
| 95 |
+
self._llm_service = llm_service
|
| 96 |
+
self._prompt_template = prompt_loader.load_template(
|
| 97 |
+
"work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier"
|
| 98 |
+
)
|
| 99 |
+
self._prompt_loader = prompt_loader
|
| 100 |
+
|
| 101 |
+
@staticmethod
|
| 102 |
+
def _parse_output(output: str) -> InvestingFocusAssetClassClassification:
|
| 103 |
+
"""
|
| 104 |
+
Parse the output from the language model into an InvestingFocusAssetClassClassification object.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
output (str): The raw output string from the language model.
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
InvestingFocusAssetClassClassification: A structured representation of the classification result.
|
| 111 |
+
|
| 112 |
+
Raises:
|
| 113 |
+
ValueError: If the output contains an unknown investing focus or asset class or invalid confidence value.
|
| 114 |
+
"""
|
| 115 |
+
lines = output.strip().split("\n")
|
| 116 |
+
parsed: dict[str, Any] = {}
|
| 117 |
+
for line in lines:
|
| 118 |
+
key, value = line.split(":", 1)
|
| 119 |
+
parsed[key.strip()] = value.strip()
|
| 120 |
+
|
| 121 |
+
investing_focus_asset_class_str = parsed["investing_focus_asset_class"].upper()
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
investing_focus_asset_class = _INVESTING_FOCUS_ASSET_CLASS_MAPPINGS[investing_focus_asset_class_str]
|
| 125 |
+
except KeyError as e:
|
| 126 |
+
raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
confidence = float(parsed["confidence"])
|
| 130 |
+
except ValueError:
|
| 131 |
+
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 132 |
+
|
| 133 |
+
classification = InvestingFocusAssetClassClassification(
|
| 134 |
+
investing_focus_asset_class=investing_focus_asset_class,
|
| 135 |
+
confidence=confidence,
|
| 136 |
+
reasoning=parsed["reasoning"],
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
if investing_focus_asset_class == InvestingFocusAssetClass.OTHER and "other_description" in parsed:
|
| 140 |
+
other_description = parsed.get("other_description")
|
| 141 |
+
classification = InvestingFocusAssetClassClassification(
|
| 142 |
+
investing_focus_asset_class=investing_focus_asset_class,
|
| 143 |
+
confidence=confidence,
|
| 144 |
+
reasoning=parsed["reasoning"],
|
| 145 |
+
other_description=other_description,
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
return classification
|
| 149 |
+
|
| 150 |
+
async def classify_investing_focus_asset_class(
|
| 151 |
+
self, linkedin_profile: LinkedinProfile, work_experience: Position
|
| 152 |
+
) -> InvestingFocusAssetClassClassification:
|
| 153 |
+
"""
|
| 154 |
+
Classify a single work experience item from a LinkedIn profile into an investing focus or asset class.
|
| 155 |
+
|
| 156 |
+
This method prepares the input for the language model, sends the query,
|
| 157 |
+
and processes the result to classify the work experience item into an investing focus or asset class.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
|
| 161 |
+
work_experience (Position): The specific work experience item to classify.
|
| 162 |
+
|
| 163 |
+
Returns:
|
| 164 |
+
InvestingFocusAssetClassClassification: The classification result for the work experience item.
|
| 165 |
+
|
| 166 |
+
Raises:
|
| 167 |
+
ValueError: If the prompt evaluation fails to produce a result.
|
| 168 |
+
"""
|
| 169 |
+
prompt = self._prompt_loader.create_prompt(
|
| 170 |
+
self._prompt_template,
|
| 171 |
+
llm_service=self._llm_service,
|
| 172 |
+
output_formatter=self._parse_output,
|
| 173 |
+
resume=format_profile_as_resume(linkedin_profile),
|
| 174 |
+
work_experience=format_position(work_experience),
|
| 175 |
+
)
|
| 176 |
+
return await prompt.evaluate() # type: ignore
|
src/vsp/app/classifiers/work_experience/investing_focus_sector_classifier.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
investing_focus_sector_classifier.py
|
| 3 |
+
|
| 4 |
+
This module provides functionality for classifying investing focus sectors
|
| 5 |
+
based on work experiences from LinkedIn profiles. It uses a language model to
|
| 6 |
+
determine the specific sector a position focuses on in terms of investments.
|
| 7 |
+
|
| 8 |
+
Classes:
|
| 9 |
+
InvestingFocusSector: Enum representing different investing focus sectors.
|
| 10 |
+
InvestingFocusSectorClassification: Pydantic model for classification results.
|
| 11 |
+
InvestingFocusSectorClassifier: Main class for classifying investing focus sectors.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
classifier = InvestingFocusSectorClassifier()
|
| 15 |
+
classification = await classifier.classify_investing_focus_sector(linkedin_profile, work_experience)
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from enum import Enum
|
| 19 |
+
from typing import Any, Final
|
| 20 |
+
|
| 21 |
+
from pydantic import BaseModel, Field
|
| 22 |
+
|
| 23 |
+
from vsp.app import bindings
|
| 24 |
+
from vsp.app.model.linkedin.linkedin_formatters import format_position, format_profile_as_resume
|
| 25 |
+
from vsp.app.model.linkedin.linkedin_models import LinkedinProfile, Position
|
| 26 |
+
from vsp.app.prompts.prompt_loader import PromptLoader
|
| 27 |
+
from vsp.llm.llm_service import LLMService
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class InvestingFocusSector(str, Enum):
|
| 31 |
+
"""Enumeration of different investing focus sectors."""
|
| 32 |
+
|
| 33 |
+
GENERALIST = "Generalist"
|
| 34 |
+
HEALTHCARE = "Healthcare"
|
| 35 |
+
INDUSTRIALS = "Industrials"
|
| 36 |
+
BUSINESS_SERVICES = "Business Services"
|
| 37 |
+
CONSUMER_RETAIL = "Consumer & Retail"
|
| 38 |
+
ENERGY_NATURAL_RESOURCES = "Energy / Natural Resources / Cleantech / Utilities"
|
| 39 |
+
REAL_ESTATE_GAMING_LODGING = "Real Estate, Gaming & Lodging"
|
| 40 |
+
TECHNOLOGY_SOFTWARE_TMT = "Technology / Software / TMT"
|
| 41 |
+
MEDIA_ENTERTAINMENT = "Media & Entertainment"
|
| 42 |
+
FINANCIAL_INSTITUTIONS = "Financial Institutions Group (FIG) / Fintech"
|
| 43 |
+
INFRASTRUCTURE_TRANSPORTATION = "Infrastructure / Transportation"
|
| 44 |
+
OTHER = "Other"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
_INVESTING_FOCUS_SECTOR_MAPPINGS: Final[dict[str, InvestingFocusSector]] = {
|
| 48 |
+
sector.name: sector for sector in InvestingFocusSector
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class InvestingFocusSectorClassification(BaseModel):
|
| 53 |
+
"""
|
| 54 |
+
Pydantic model representing the classification result for an investing focus sector.
|
| 55 |
+
|
| 56 |
+
Attributes:
|
| 57 |
+
investing_focus_sector (InvestingFocusSector): The classified investing focus sector.
|
| 58 |
+
confidence (float): Confidence level of the classification, between 0.0 and 1.0.
|
| 59 |
+
reasoning (str): Explanation for the classification decision.
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
investing_focus_sector: InvestingFocusSector = Field(description="The investing focus sector")
|
| 63 |
+
confidence: float = Field(description="Confidence level between 0.0 and 1.0", ge=0.0, le=1.0)
|
| 64 |
+
reasoning: str = Field(description="Explanation for the classification")
|
| 65 |
+
model_config = {"frozen": True} # This makes the model immutable and hashable
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class InvestingFocusSectorClassifier:
|
| 69 |
+
"""
|
| 70 |
+
A class for classifying investing focus sectors based on work experiences from LinkedIn profiles.
|
| 71 |
+
|
| 72 |
+
This classifier uses a language model to determine the specific sector
|
| 73 |
+
a position focuses on in terms of investments, based on the information
|
| 74 |
+
provided in a LinkedIn profile and specific work experience.
|
| 75 |
+
|
| 76 |
+
Attributes:
|
| 77 |
+
_llm_service (LLMService): The language model service used for classification.
|
| 78 |
+
_prompt_template (Any): The template for generating prompts for the language model.
|
| 79 |
+
_prompt_loader (PromptLoader): The loader for prompt templates.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
def __init__(
|
| 83 |
+
self, llm_service: LLMService = bindings.open_ai_service, prompt_loader: PromptLoader = bindings.prompt_loader
|
| 84 |
+
) -> None:
|
| 85 |
+
"""
|
| 86 |
+
Initialize the InvestingFocusSectorClassifier.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
llm_service (LLMService, optional): The language model service to use.
|
| 90 |
+
Defaults to the OpenAI service defined in bindings.
|
| 91 |
+
prompt_loader (PromptLoader, optional): The prompt loader to use.
|
| 92 |
+
Defaults to the prompt loader defined in bindings.
|
| 93 |
+
"""
|
| 94 |
+
self._llm_service = llm_service
|
| 95 |
+
self._prompt_template = prompt_loader.load_template(
|
| 96 |
+
"work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier"
|
| 97 |
+
)
|
| 98 |
+
self._prompt_loader = prompt_loader
|
| 99 |
+
|
| 100 |
+
@staticmethod
|
| 101 |
+
def _parse_output(output: str) -> InvestingFocusSectorClassification:
|
| 102 |
+
"""
|
| 103 |
+
Parse the output from the language model into an InvestingFocusSectorClassification object.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
output (str): The raw output string from the language model.
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
InvestingFocusSectorClassification: A structured representation of the classification result.
|
| 110 |
+
|
| 111 |
+
Raises:
|
| 112 |
+
ValueError: If the output contains an unknown investing focus sector or invalid confidence value.
|
| 113 |
+
"""
|
| 114 |
+
lines = output.strip().split("\n")
|
| 115 |
+
parsed: dict[str, Any] = {}
|
| 116 |
+
for line in lines:
|
| 117 |
+
key, value = line.split(":", 1)
|
| 118 |
+
parsed[key.strip()] = value.strip()
|
| 119 |
+
|
| 120 |
+
investing_focus_sector_str = parsed["investing_focus_sector"].upper()
|
| 121 |
+
|
| 122 |
+
try:
|
| 123 |
+
investing_focus_sector = _INVESTING_FOCUS_SECTOR_MAPPINGS[investing_focus_sector_str]
|
| 124 |
+
except KeyError as e:
|
| 125 |
+
raise ValueError(f"Unknown investing focus sector: {str(e)}")
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
confidence = float(parsed["confidence"])
|
| 129 |
+
except ValueError:
|
| 130 |
+
raise ValueError(f"Invalid confidence value: {parsed['confidence']}")
|
| 131 |
+
|
| 132 |
+
classification = InvestingFocusSectorClassification(
|
| 133 |
+
investing_focus_sector=investing_focus_sector,
|
| 134 |
+
confidence=confidence,
|
| 135 |
+
reasoning=parsed["reasoning"],
|
| 136 |
+
)
|
| 137 |
+
return classification
|
| 138 |
+
|
| 139 |
+
async def classify_investing_focus_sector(
|
| 140 |
+
self, linkedin_profile: LinkedinProfile, work_experience: Position
|
| 141 |
+
) -> InvestingFocusSectorClassification:
|
| 142 |
+
"""
|
| 143 |
+
Classify a single work experience item from a LinkedIn profile into an investing focus sector.
|
| 144 |
+
|
| 145 |
+
This method prepares the input for the language model, sends the query,
|
| 146 |
+
and processes the result to classify the work experience item into an investing focus sector.
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
linkedin_profile (LinkedinProfile): The full LinkedIn profile of the individual.
|
| 150 |
+
work_experience (Position): The specific work experience item to classify.
|
| 151 |
+
|
| 152 |
+
Returns:
|
| 153 |
+
InvestingFocusSectorClassification: The classification result for the work experience item.
|
| 154 |
+
|
| 155 |
+
Raises:
|
| 156 |
+
ValueError: If the prompt evaluation fails to produce a result.
|
| 157 |
+
"""
|
| 158 |
+
prompt = self._prompt_loader.create_prompt(
|
| 159 |
+
self._prompt_template,
|
| 160 |
+
llm_service=self._llm_service,
|
| 161 |
+
output_formatter=self._parse_output,
|
| 162 |
+
resume=format_profile_as_resume(linkedin_profile),
|
| 163 |
+
work_experience=format_position(work_experience),
|
| 164 |
+
)
|
| 165 |
+
return await prompt.evaluate() # type: ignore
|
src/vsp/app/main.py
CHANGED
|
@@ -40,6 +40,14 @@ from vsp.app.classifiers.work_experience.general_work_experience_classifier impo
|
|
| 40 |
WorkExperienceClassification,
|
| 41 |
WorkExperienceClassifier,
|
| 42 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 44 |
InvestmentBankingGroupClassification,
|
| 45 |
InvestmentBankingGroupClassifier,
|
|
@@ -69,11 +77,17 @@ class ClassifiedWorkExperience(BaseModel):
|
|
| 69 |
work_experience_classification (WorkExperienceClassification): The general work experience classification.
|
| 70 |
investment_banking_classification (InvestmentBankingGroupClassification | None):
|
| 71 |
The investment banking group classification, if applicable.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
|
| 74 |
position: Position
|
| 75 |
work_experience_classification: WorkExperienceClassification
|
| 76 |
investment_banking_classification: InvestmentBankingGroupClassification | None = None
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
class LinkedinProfileClassificationResults(BaseModel):
|
|
@@ -98,6 +112,8 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
|
|
| 98 |
2. Classifies all work experience items in the profile.
|
| 99 |
3. For work experiences classified as investment banking, performs an additional
|
| 100 |
investment banking group classification.
|
|
|
|
|
|
|
| 101 |
|
| 102 |
Args:
|
| 103 |
profile (LinkedinProfile): The LinkedIn profile to process.
|
|
@@ -108,6 +124,8 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
|
|
| 108 |
education_classifier = EducationClassifier()
|
| 109 |
work_experience_classifier = WorkExperienceClassifier()
|
| 110 |
investment_banking_classifier = InvestmentBankingGroupClassifier()
|
|
|
|
|
|
|
| 111 |
|
| 112 |
# Classify educations
|
| 113 |
education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
|
|
@@ -119,7 +137,7 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
|
|
| 119 |
]
|
| 120 |
work_experience_classifications = await asyncio.gather(*work_experience_tasks)
|
| 121 |
|
| 122 |
-
# Classify investment banking groups for relevant positions
|
| 123 |
classified_work_experiences = []
|
| 124 |
for position, work_classification in zip(profile.positions, work_experience_classifications):
|
| 125 |
classified_work_experience = ClassifiedWorkExperience(
|
|
@@ -130,6 +148,17 @@ async def process_linkedin_profile(profile: LinkedinProfile) -> LinkedinProfileC
|
|
| 130 |
ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
|
| 131 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
classified_work_experiences.append(classified_work_experience)
|
| 134 |
|
| 135 |
# Prepare the results using Pydantic models
|
|
|
|
| 40 |
WorkExperienceClassification,
|
| 41 |
WorkExperienceClassifier,
|
| 42 |
)
|
| 43 |
+
from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
|
| 44 |
+
InvestingFocusAssetClassClassification,
|
| 45 |
+
InvestingFocusAssetClassClassifier,
|
| 46 |
+
)
|
| 47 |
+
from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
|
| 48 |
+
InvestingFocusSectorClassification,
|
| 49 |
+
InvestingFocusSectorClassifier,
|
| 50 |
+
)
|
| 51 |
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 52 |
InvestmentBankingGroupClassification,
|
| 53 |
InvestmentBankingGroupClassifier,
|
|
|
|
| 77 |
work_experience_classification (WorkExperienceClassification): The general work experience classification.
|
| 78 |
investment_banking_classification (InvestmentBankingGroupClassification | None):
|
| 79 |
The investment banking group classification, if applicable.
|
| 80 |
+
investing_focus_asset_class_classification (InvestingFocusAssetClassClassification | None):
|
| 81 |
+
The investing focus or asset class classification, if applicable.
|
| 82 |
+
investing_focus_sector_classification (InvestingFocusSectorClassification | None):
|
| 83 |
+
The investing focus sector classification, if applicable.
|
| 84 |
"""
|
| 85 |
|
| 86 |
position: Position
|
| 87 |
work_experience_classification: WorkExperienceClassification
|
| 88 |
investment_banking_classification: InvestmentBankingGroupClassification | None = None
|
| 89 |
+
investing_focus_asset_class_classification: InvestingFocusAssetClassClassification | None = None
|
| 90 |
+
investing_focus_sector_classification: InvestingFocusSectorClassification | None = None
|
| 91 |
|
| 92 |
|
| 93 |
class LinkedinProfileClassificationResults(BaseModel):
|
|
|
|
| 112 |
2. Classifies all work experience items in the profile.
|
| 113 |
3. For work experiences classified as investment banking, performs an additional
|
| 114 |
investment banking group classification.
|
| 115 |
+
4. For work experiences classified as investing, performs an additional
|
| 116 |
+
investing focus or asset class classification.
|
| 117 |
|
| 118 |
Args:
|
| 119 |
profile (LinkedinProfile): The LinkedIn profile to process.
|
|
|
|
| 124 |
education_classifier = EducationClassifier()
|
| 125 |
work_experience_classifier = WorkExperienceClassifier()
|
| 126 |
investment_banking_classifier = InvestmentBankingGroupClassifier()
|
| 127 |
+
investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
|
| 128 |
+
investing_focus_sector_classifier = InvestingFocusSectorClassifier()
|
| 129 |
|
| 130 |
# Classify educations
|
| 131 |
education_tasks = [education_classifier.classify_education(profile, education) for education in profile.educations]
|
|
|
|
| 137 |
]
|
| 138 |
work_experience_classifications = await asyncio.gather(*work_experience_tasks)
|
| 139 |
|
| 140 |
+
# Classify investment banking groups and investing focus/asset classes for relevant positions
|
| 141 |
classified_work_experiences = []
|
| 142 |
for position, work_classification in zip(profile.positions, work_experience_classifications):
|
| 143 |
classified_work_experience = ClassifiedWorkExperience(
|
|
|
|
| 148 |
ib_classification = await investment_banking_classifier.classify_investment_banking_group(profile, position)
|
| 149 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 150 |
|
| 151 |
+
if work_classification.secondary_job_type == SecondaryJobType.INVESTING:
|
| 152 |
+
investing_asset_class_classification = (
|
| 153 |
+
await investing_focus_asset_class_classifier.classify_investing_focus_asset_class(profile, position)
|
| 154 |
+
)
|
| 155 |
+
classified_work_experience.investing_focus_asset_class_classification = investing_asset_class_classification
|
| 156 |
+
|
| 157 |
+
investing_sector_classification = await investing_focus_sector_classifier.classify_investing_focus_sector(
|
| 158 |
+
profile, position
|
| 159 |
+
)
|
| 160 |
+
classified_work_experience.investing_focus_sector_classification = investing_sector_classification
|
| 161 |
+
|
| 162 |
classified_work_experiences.append(classified_work_experience)
|
| 163 |
|
| 164 |
# Prepare the results using Pydantic models
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_human.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please classify the following work experience item based on the job candidate's full resume and the specific work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investing focus or asset class.
|
| 2 |
+
|
| 3 |
+
Full Resume:
|
| 4 |
+
{resume}
|
| 5 |
+
|
| 6 |
+
Specific Work Experience Item:
|
| 7 |
+
{work_experience}
|
| 8 |
+
|
| 9 |
+
Provide your classification for the investing focus or asset class OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
|
| 10 |
+
|
| 11 |
+
NOTE: The candidate may have changed jobs, and therefore, investing focus. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
|
| 12 |
+
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_asset_class/1 - investing_focus_asset_class_classifier_system.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in analyzing professional work experiences and classifying them according to specific investing focus and asset classes. Your task is to examine a job candidate's full resume and a specific work experience item from their LinkedIn profile, then classify the work experience into one of the predefined investing focus or asset class categories.
|
| 2 |
+
|
| 3 |
+
Use the provided information carefully to make an accurate classification. Pay close attention to job titles, responsibilities, company descriptions, and any other relevant information provided in both the resume and the specific work experience item.
|
| 4 |
+
|
| 5 |
+
Investing Focus / Asset Class categories:
|
| 6 |
+
1. EARLY_STAGE_VC: Early-stage venture capital investments
|
| 7 |
+
2. LATE_STAGE_VC: Late-stage venture capital investments
|
| 8 |
+
3. MULTI_STAGE_VC: Venture capital investments across multiple stages
|
| 9 |
+
4. GROWTH_EQUITY: Growth equity investments
|
| 10 |
+
5. PRE_IPO: Pre-IPO investments
|
| 11 |
+
6. PUBLIC_EQUITIES: Investments in publicly traded equities
|
| 12 |
+
7. REAL_ESTATE: Real estate investments
|
| 13 |
+
8. PRIVATE_EQUITY_BUYOUTS: Private equity and buyout investments
|
| 14 |
+
9. HEDGE_FUND: Hedge fund investments
|
| 15 |
+
10. CREDIT: Credit investments
|
| 16 |
+
11. SECONDARIES: Secondary market investments
|
| 17 |
+
12. OTHER: Any focus or asset class that doesn't fit the above categories
|
| 18 |
+
|
| 19 |
+
Provide your response in the following format exactly:
|
| 20 |
+
|
| 21 |
+
investing_focus_asset_class: [ONE_OF_THE_ABOVE_CATEGORIES]
|
| 22 |
+
other_description: [Only if OTHER is selected, provide a brief description]
|
| 23 |
+
confidence: [0.0 to 1.0]
|
| 24 |
+
reasoning: [Your explanation here]
|
| 25 |
+
|
| 26 |
+
Ensure each part of your response is on a separate line, exactly as shown above. There should be only three or four lines (depending on whether OTHER is selected).
|
| 27 |
+
|
| 28 |
+
The investing_focus_asset_class must be one of: EARLY_STAGE_VC, LATE_STAGE_VC, MULTI_STAGE_VC, GROWTH_EQUITY, PRE_IPO, PUBLIC_EQUITIES, REAL_ESTATE, PRIVATE_EQUITY_BUYOUTS, HEDGE_FUND, CREDIT, SECONDARIES, OTHER.
|
| 29 |
+
|
| 30 |
+
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 31 |
+
|
| 32 |
+
In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision. If you select OTHER, provide a brief description of the focus or asset class in the other_description field.
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_human.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Please classify the following work experience item based on the job candidate's full resume and the specific work experience information provided from their LinkedIn profile. Analyze both sources of information carefully to determine the most accurate classification for the investing focus sector.
|
| 2 |
+
|
| 3 |
+
Full Resume:
|
| 4 |
+
{resume}
|
| 5 |
+
|
| 6 |
+
Specific Work Experience Item:
|
| 7 |
+
{work_experience}
|
| 8 |
+
|
| 9 |
+
Provide your classification for the investing focus sector OF THIS SPECIFIC WORK EXPERIENCE, along with your confidence level (0.0 to 1.0) and reasoning in the specified format. Ensure your reasoning refers to specific details from this specific work experience item that support your decision.
|
| 10 |
+
|
| 11 |
+
NOTE: The candidate may have changed jobs, and therefore, investing focus sectors. Do not rely on the most recent job, or the most detailed job description, in making your classification. If there is no supporting evidence to prove your classification for THIS specific work experience on a standalone basis, LOWER your confidence level TO BELOW 0.5.
|
src/vsp/app/prompts/work_experience_classifier/investing_focus_sector/1 - investing_focus_sector_classifier_system.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in analyzing professional work experiences and classifying them according to specific investing focus sectors. Your task is to examine a job candidate's full resume and a specific work experience item from their LinkedIn profile, then classify the work experience into one of the predefined investing focus sector categories.
|
| 2 |
+
|
| 3 |
+
Use the provided information carefully to make an accurate classification. Pay close attention to job titles, responsibilities, company descriptions, and any other relevant information provided in both the resume and the specific work experience item.
|
| 4 |
+
|
| 5 |
+
Investing Focus Sector categories:
|
| 6 |
+
1. GENERALIST: A broad focus across multiple sectors
|
| 7 |
+
2. HEALTHCARE: Focus on healthcare, biotech, pharmaceuticals, and medical devices
|
| 8 |
+
3. INDUSTRIALS: Focus on manufacturing, aerospace, defense, and other industrial sectors
|
| 9 |
+
4. BUSINESS_SERVICES: Focus on professional services, consulting, and B2B companies
|
| 10 |
+
5. CONSUMER_RETAIL: Focus on consumer goods, retail, and e-commerce
|
| 11 |
+
6. ENERGY_NATURAL_RESOURCES: Focus on energy, natural resources, cleantech, and utilities
|
| 12 |
+
7. REAL_ESTATE_GAMING_LODGING: Focus on real estate, gaming, casinos, and hospitality
|
| 13 |
+
8. TECHNOLOGY_SOFTWARE_TMT: Focus on technology, software, and telecom
|
| 14 |
+
9. MEDIA_ENTERTAINMENT: Focus on media, entertainment, and content creation
|
| 15 |
+
10. FINANCIAL_INSTITUTIONS: Focus on banks, insurance, fintech, and other financial services
|
| 16 |
+
11. INFRASTRUCTURE_TRANSPORTATION: Focus on infrastructure projects and transportation
|
| 17 |
+
12. OTHER: Any focus that doesn't fit the above categories
|
| 18 |
+
|
| 19 |
+
Provide your response in the following format exactly:
|
| 20 |
+
|
| 21 |
+
investing_focus_sector: [ONE_OF_THE_ABOVE_CATEGORIES]
|
| 22 |
+
confidence: [0.0 to 1.0]
|
| 23 |
+
reasoning: [Your explanation here]
|
| 24 |
+
|
| 25 |
+
Ensure each part of your response is on a separate line, exactly as shown above. There should be exactly three lines.
|
| 26 |
+
|
| 27 |
+
The investing_focus_sector must be one of: GENERALIST, HEALTHCARE, INDUSTRIALS, BUSINESS_SERVICES, CONSUMER_RETAIL, ENERGY_NATURAL_RESOURCES, REAL_ESTATE_GAMING_LODGING, TECHNOLOGY_SOFTWARE_TMT, MEDIA_ENTERTAINMENT, FINANCIAL_INSTITUTIONS, INFRASTRUCTURE_TRANSPORTATION, OTHER.
|
| 28 |
+
|
| 29 |
+
Your confidence level should reflect how certain you are about your classification based on the information provided.
|
| 30 |
+
|
| 31 |
+
In your reasoning, briefly explain why you chose this classification, referencing specific details from the resume and work experience item that support your decision.
|
tests/vsp/app/test_main.py
CHANGED
|
@@ -8,6 +8,14 @@ from vsp.app.classifiers.work_experience.general_work_experience_classifier impo
|
|
| 8 |
SecondaryJobType,
|
| 9 |
WorkExperienceClassification,
|
| 10 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 12 |
InvestmentBankingGroup,
|
| 13 |
InvestmentBankingGroupClassification,
|
|
@@ -33,6 +41,10 @@ def sample_linkedin_profile():
|
|
| 33 |
title="Investment Banking Analyst",
|
| 34 |
company_name="Bank Corp",
|
| 35 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
Position(
|
| 37 |
title="Software Engineer",
|
| 38 |
company_name="Tech Corp",
|
|
@@ -42,53 +54,123 @@ def sample_linkedin_profile():
|
|
| 42 |
|
| 43 |
|
| 44 |
@pytest.mark.asyncio
|
| 45 |
-
async def
|
| 46 |
with (
|
| 47 |
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 48 |
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 49 |
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
|
|
|
|
|
|
| 50 |
):
|
| 51 |
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 52 |
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
| 53 |
)
|
| 54 |
mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
)
|
| 62 |
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
|
| 63 |
return_value=InvestmentBankingGroupClassification(
|
| 64 |
investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
|
| 65 |
)
|
| 66 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 69 |
|
| 70 |
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 71 |
assert len(result.classified_educations) == 1
|
| 72 |
-
assert len(result.classified_work_experiences) ==
|
| 73 |
|
| 74 |
-
# Check
|
| 75 |
ib_experience = result.classified_work_experiences[0]
|
| 76 |
assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
|
| 77 |
assert ib_experience.investment_banking_classification is not None
|
| 78 |
assert (
|
| 79 |
ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
|
| 80 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
# Check
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
@pytest.mark.asyncio
|
| 87 |
-
async def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
with (
|
| 89 |
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 90 |
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 91 |
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
|
|
|
|
|
|
| 92 |
):
|
| 93 |
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 94 |
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
|
@@ -101,21 +183,23 @@ async def test_process_linkedin_profile_non_investment_banking(sample_linkedin_p
|
|
| 101 |
reasoning="Test",
|
| 102 |
)
|
| 103 |
)
|
| 104 |
-
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock()
|
| 105 |
|
| 106 |
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 107 |
|
| 108 |
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 109 |
assert len(result.classified_educations) == 1
|
| 110 |
-
assert len(result.classified_work_experiences) ==
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
| 116 |
|
| 117 |
-
#
|
| 118 |
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
|
|
|
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
if __name__ == "__main__":
|
|
|
|
| 8 |
SecondaryJobType,
|
| 9 |
WorkExperienceClassification,
|
| 10 |
)
|
| 11 |
+
from vsp.app.classifiers.work_experience.investing_focus_asset_class_classifier import (
|
| 12 |
+
InvestingFocusAssetClass,
|
| 13 |
+
InvestingFocusAssetClassClassification,
|
| 14 |
+
)
|
| 15 |
+
from vsp.app.classifiers.work_experience.investing_focus_sector_classifier import (
|
| 16 |
+
InvestingFocusSector,
|
| 17 |
+
InvestingFocusSectorClassification,
|
| 18 |
+
)
|
| 19 |
from vsp.app.classifiers.work_experience.investment_banking_group_classifier import (
|
| 20 |
InvestmentBankingGroup,
|
| 21 |
InvestmentBankingGroupClassification,
|
|
|
|
| 41 |
title="Investment Banking Analyst",
|
| 42 |
company_name="Bank Corp",
|
| 43 |
),
|
| 44 |
+
Position(
|
| 45 |
+
title="Investment Associate",
|
| 46 |
+
company_name="VC Firm",
|
| 47 |
+
),
|
| 48 |
Position(
|
| 49 |
title="Software Engineer",
|
| 50 |
company_name="Tech Corp",
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
@pytest.mark.asyncio
|
| 57 |
+
async def test_process_linkedin_profile_comprehensive(sample_linkedin_profile):
|
| 58 |
with (
|
| 59 |
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 60 |
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 61 |
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
| 62 |
+
patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
|
| 63 |
+
patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
|
| 64 |
):
|
| 65 |
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 66 |
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
| 67 |
)
|
| 68 |
mock_work_experience_classifier.return_value.classify_work_experience = AsyncMock(
|
| 69 |
+
side_effect=[
|
| 70 |
+
WorkExperienceClassification(
|
| 71 |
+
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 72 |
+
secondary_job_type=SecondaryJobType.INVESTMENT_BANKING,
|
| 73 |
+
confidence=1.0,
|
| 74 |
+
reasoning="Test",
|
| 75 |
+
),
|
| 76 |
+
WorkExperienceClassification(
|
| 77 |
+
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 78 |
+
secondary_job_type=SecondaryJobType.INVESTING,
|
| 79 |
+
confidence=1.0,
|
| 80 |
+
reasoning="Test",
|
| 81 |
+
),
|
| 82 |
+
WorkExperienceClassification(
|
| 83 |
+
primary_job_type=PrimaryJobType.FULL_TIME,
|
| 84 |
+
secondary_job_type=SecondaryJobType.ENGINEERING,
|
| 85 |
+
confidence=1.0,
|
| 86 |
+
reasoning="Test",
|
| 87 |
+
),
|
| 88 |
+
]
|
| 89 |
)
|
| 90 |
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group = AsyncMock(
|
| 91 |
return_value=InvestmentBankingGroupClassification(
|
| 92 |
investment_banking_group=InvestmentBankingGroup.M_AND_A, confidence=1.0, reasoning="Test"
|
| 93 |
)
|
| 94 |
)
|
| 95 |
+
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class = AsyncMock(
|
| 96 |
+
return_value=InvestingFocusAssetClassClassification(
|
| 97 |
+
investing_focus_asset_class=InvestingFocusAssetClass.EARLY_STAGE_VC,
|
| 98 |
+
confidence=1.0,
|
| 99 |
+
reasoning="Test",
|
| 100 |
+
)
|
| 101 |
+
)
|
| 102 |
+
mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector = AsyncMock(
|
| 103 |
+
return_value=InvestingFocusSectorClassification(
|
| 104 |
+
investing_focus_sector=InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT,
|
| 105 |
+
confidence=1.0,
|
| 106 |
+
reasoning="Test",
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
|
| 110 |
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 111 |
|
| 112 |
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 113 |
assert len(result.classified_educations) == 1
|
| 114 |
+
assert len(result.classified_work_experiences) == 3
|
| 115 |
|
| 116 |
+
# Check investment banking position
|
| 117 |
ib_experience = result.classified_work_experiences[0]
|
| 118 |
assert ib_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTMENT_BANKING
|
| 119 |
assert ib_experience.investment_banking_classification is not None
|
| 120 |
assert (
|
| 121 |
ib_experience.investment_banking_classification.investment_banking_group == InvestmentBankingGroup.M_AND_A
|
| 122 |
)
|
| 123 |
+
assert ib_experience.investing_focus_asset_class_classification is None
|
| 124 |
+
assert ib_experience.investing_focus_sector_classification is None
|
| 125 |
+
|
| 126 |
+
# Check investing position
|
| 127 |
+
investing_experience = result.classified_work_experiences[1]
|
| 128 |
+
assert investing_experience.work_experience_classification.secondary_job_type == SecondaryJobType.INVESTING
|
| 129 |
+
assert investing_experience.investment_banking_classification is None
|
| 130 |
+
assert investing_experience.investing_focus_asset_class_classification is not None
|
| 131 |
+
assert (
|
| 132 |
+
investing_experience.investing_focus_asset_class_classification.investing_focus_asset_class
|
| 133 |
+
== InvestingFocusAssetClass.EARLY_STAGE_VC
|
| 134 |
+
)
|
| 135 |
+
assert investing_experience.investing_focus_sector_classification is not None
|
| 136 |
+
assert (
|
| 137 |
+
investing_experience.investing_focus_sector_classification.investing_focus_sector
|
| 138 |
+
== InvestingFocusSector.TECHNOLOGY_SOFTWARE_TMT
|
| 139 |
+
)
|
| 140 |
|
| 141 |
+
# Check engineering position
|
| 142 |
+
eng_experience = result.classified_work_experiences[2]
|
| 143 |
+
assert eng_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
|
| 144 |
+
assert eng_experience.investment_banking_classification is None
|
| 145 |
+
assert eng_experience.investing_focus_asset_class_classification is None
|
| 146 |
+
assert eng_experience.investing_focus_sector_classification is None
|
| 147 |
+
|
| 148 |
+
# Check that the classifiers were called the correct number of times
|
| 149 |
+
assert mock_education_classifier.return_value.classify_education.call_count == 1
|
| 150 |
+
assert mock_work_experience_classifier.return_value.classify_work_experience.call_count == 3
|
| 151 |
+
assert mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.call_count == 1
|
| 152 |
+
assert (
|
| 153 |
+
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.call_count
|
| 154 |
+
== 1
|
| 155 |
+
)
|
| 156 |
+
assert mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.call_count == 1
|
| 157 |
|
| 158 |
|
| 159 |
@pytest.mark.asyncio
|
| 160 |
+
async def test_process_linkedin_profile_no_investing(sample_linkedin_profile):
|
| 161 |
+
sample_linkedin_profile.positions = [
|
| 162 |
+
Position(
|
| 163 |
+
title="Software Engineer",
|
| 164 |
+
company_name="Tech Corp",
|
| 165 |
+
)
|
| 166 |
+
]
|
| 167 |
+
|
| 168 |
with (
|
| 169 |
patch("vsp.app.main.EducationClassifier") as mock_education_classifier,
|
| 170 |
patch("vsp.app.main.WorkExperienceClassifier") as mock_work_experience_classifier,
|
| 171 |
patch("vsp.app.main.InvestmentBankingGroupClassifier") as mock_investment_banking_group_classifier,
|
| 172 |
+
patch("vsp.app.main.InvestingFocusAssetClassClassifier") as mock_investing_focus_asset_class_classifier,
|
| 173 |
+
patch("vsp.app.main.InvestingFocusSectorClassifier") as mock_investing_focus_sector_classifier,
|
| 174 |
):
|
| 175 |
mock_education_classifier.return_value.classify_education = AsyncMock(
|
| 176 |
return_value=EducationClassification(output=SchoolType.MBA, confidence=1.0, reasoning="Test")
|
|
|
|
| 183 |
reasoning="Test",
|
| 184 |
)
|
| 185 |
)
|
|
|
|
| 186 |
|
| 187 |
result = await process_linkedin_profile(sample_linkedin_profile)
|
| 188 |
|
| 189 |
assert isinstance(result, LinkedinProfileClassificationResults)
|
| 190 |
assert len(result.classified_educations) == 1
|
| 191 |
+
assert len(result.classified_work_experiences) == 1
|
| 192 |
|
| 193 |
+
work_experience = result.classified_work_experiences[0]
|
| 194 |
+
assert work_experience.work_experience_classification.secondary_job_type == SecondaryJobType.ENGINEERING
|
| 195 |
+
assert work_experience.investment_banking_classification is None
|
| 196 |
+
assert work_experience.investing_focus_asset_class_classification is None
|
| 197 |
+
assert work_experience.investing_focus_sector_classification is None
|
| 198 |
|
| 199 |
+
# ensure investment banking, investing focus asset class, and investing focus sector classifiers were not called
|
| 200 |
mock_investment_banking_group_classifier.return_value.classify_investment_banking_group.assert_not_called()
|
| 201 |
+
mock_investing_focus_asset_class_classifier.return_value.classify_investing_focus_asset_class.assert_not_called()
|
| 202 |
+
mock_investing_focus_sector_classifier.return_value.classify_investing_focus_sector.assert_not_called()
|
| 203 |
|
| 204 |
|
| 205 |
if __name__ == "__main__":
|