fix after merge
Browse files- src/vsp/app/1st_gradio.py +36 -26
- src/vsp/app/bindings.py +0 -2
- src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py +0 -1
- src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py +1 -3
- src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +0 -1
- src/vsp/app/main.py +96 -3
- src/vsp/app/model/linkedin/linkedin_models.py +1 -0
- src/vsp/app/scrapers/linkedin_downloader.py +10 -8
src/vsp/app/1st_gradio.py
CHANGED
|
@@ -4,9 +4,11 @@ import nest_asyncio
|
|
| 4 |
# Apply nest_asyncio to allow nested event loops
|
| 5 |
nest_asyncio.apply()
|
| 6 |
|
|
|
|
|
|
|
| 7 |
# Import your custom modules
|
| 8 |
from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
|
| 9 |
-
|
| 10 |
|
| 11 |
async def process_profile(profile_linkedin):
|
| 12 |
downloader = LinkedinDownloader()
|
|
@@ -15,23 +17,26 @@ async def process_profile(profile_linkedin):
|
|
| 15 |
enriched_profile = await enricher.process_linkedin_profile(profile=profile)
|
| 16 |
return enriched_profile
|
| 17 |
|
|
|
|
| 18 |
async def analyze_profile(profile_linkedin):
|
| 19 |
enriched_profile = await process_profile(profile_linkedin)
|
| 20 |
# Generate output from enriched_profile
|
| 21 |
education_outputs = []
|
| 22 |
work_experience_outputs = []
|
| 23 |
-
|
| 24 |
# Process classified educations
|
| 25 |
for idx, edu in enumerate(enriched_profile.classified_educations, 1):
|
| 26 |
school = edu.education.school_name
|
| 27 |
degree = edu.education.degree
|
| 28 |
year = edu.education.end.year if edu.education.end else "N/A"
|
| 29 |
classification = edu.classification.output.value
|
| 30 |
-
education_outputs.append(
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# Add total years of full-time work experience
|
| 37 |
total_experience_years = enriched_profile.full_time_work_experience_years
|
|
@@ -43,58 +48,62 @@ async def analyze_profile(profile_linkedin):
|
|
| 43 |
experience_output += "### Work Experience by Secondary Job Type:\n"
|
| 44 |
for secondary_job_type, years in experience_by_secondary.items():
|
| 45 |
experience_output += f"- {secondary_job_type.value}: {years} years\n"
|
| 46 |
-
|
| 47 |
# Process classified work experiences
|
| 48 |
for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
|
| 49 |
company = exp.position.company_name
|
| 50 |
start_year = exp.position.start.year if exp.position.start else "N/A"
|
| 51 |
-
end_year = exp.position.end.year if (exp.position.end and exp.position.end.year)
|
| 52 |
time_range = f"{start_year} - {end_year}"
|
| 53 |
title = exp.position.title
|
| 54 |
primary_job_type = exp.work_experience_classification.primary_job_type.value
|
| 55 |
secondary_job_type = exp.work_experience_classification.secondary_job_type.value
|
| 56 |
|
| 57 |
-
work_exp_str = (
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
# Investing focus
|
| 65 |
if exp.investing_focus_asset_class_classification:
|
| 66 |
asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
|
| 67 |
sector = (
|
| 68 |
exp.investing_focus_sector_classification.investing_focus_sector.value
|
| 69 |
-
if exp.investing_focus_sector_classification
|
|
|
|
| 70 |
)
|
| 71 |
work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
|
| 72 |
work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
|
| 73 |
-
|
| 74 |
# Investment banking classification
|
| 75 |
if exp.investment_banking_classification:
|
| 76 |
ib_group = exp.investment_banking_classification.investment_banking_group.value
|
| 77 |
work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
|
| 78 |
-
|
| 79 |
work_experience_outputs.append(work_exp_str)
|
| 80 |
-
|
| 81 |
# Combine outputs
|
| 82 |
-
education_output =
|
| 83 |
-
work_experience_output =
|
| 84 |
-
|
| 85 |
full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
|
| 86 |
return full_output
|
| 87 |
|
|
|
|
| 88 |
def main():
|
| 89 |
# Define Gradio interface
|
| 90 |
with gr.Blocks() as demo:
|
| 91 |
gr.Markdown("# LinkedIn Profile Analyzer")
|
| 92 |
gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
|
| 93 |
-
|
| 94 |
profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
|
| 95 |
analyze_button = gr.Button("Analyze")
|
| 96 |
output = gr.Markdown()
|
| 97 |
-
|
| 98 |
async def on_analyze_click(profile_linkedin):
|
| 99 |
if not profile_linkedin:
|
| 100 |
return "Please enter a valid LinkedIn Profile URL."
|
|
@@ -103,9 +112,10 @@ def main():
|
|
| 103 |
return result
|
| 104 |
except Exception as e:
|
| 105 |
return f"An error occurred: {str(e)}"
|
| 106 |
-
|
| 107 |
analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
|
| 108 |
demo.launch()
|
| 109 |
|
|
|
|
| 110 |
if __name__ == "__main__":
|
| 111 |
main()
|
|
|
|
| 4 |
# Apply nest_asyncio to allow nested event loops
|
| 5 |
nest_asyncio.apply()
|
| 6 |
|
| 7 |
+
from vsp.app.main import VspDataEnrichment
|
| 8 |
+
|
| 9 |
# Import your custom modules
|
| 10 |
from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
|
| 11 |
+
|
| 12 |
|
| 13 |
async def process_profile(profile_linkedin):
|
| 14 |
downloader = LinkedinDownloader()
|
|
|
|
| 17 |
enriched_profile = await enricher.process_linkedin_profile(profile=profile)
|
| 18 |
return enriched_profile
|
| 19 |
|
| 20 |
+
|
| 21 |
async def analyze_profile(profile_linkedin):
|
| 22 |
enriched_profile = await process_profile(profile_linkedin)
|
| 23 |
# Generate output from enriched_profile
|
| 24 |
education_outputs = []
|
| 25 |
work_experience_outputs = []
|
| 26 |
+
|
| 27 |
# Process classified educations
|
| 28 |
for idx, edu in enumerate(enriched_profile.classified_educations, 1):
|
| 29 |
school = edu.education.school_name
|
| 30 |
degree = edu.education.degree
|
| 31 |
year = edu.education.end.year if edu.education.end else "N/A"
|
| 32 |
classification = edu.classification.output.value
|
| 33 |
+
education_outputs.append(
|
| 34 |
+
f"### Education {idx}\n"
|
| 35 |
+
f"**School:** {school}\n\n"
|
| 36 |
+
f"**Degree:** {degree}\n\n"
|
| 37 |
+
f"**Year:** {year}\n\n"
|
| 38 |
+
f"**Classification:** {classification}\n"
|
| 39 |
+
)
|
| 40 |
|
| 41 |
# Add total years of full-time work experience
|
| 42 |
total_experience_years = enriched_profile.full_time_work_experience_years
|
|
|
|
| 48 |
experience_output += "### Work Experience by Secondary Job Type:\n"
|
| 49 |
for secondary_job_type, years in experience_by_secondary.items():
|
| 50 |
experience_output += f"- {secondary_job_type.value}: {years} years\n"
|
| 51 |
+
|
| 52 |
# Process classified work experiences
|
| 53 |
for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
|
| 54 |
company = exp.position.company_name
|
| 55 |
start_year = exp.position.start.year if exp.position.start else "N/A"
|
| 56 |
+
end_year = exp.position.end.year if (exp.position.end and exp.position.end.year) else "Present"
|
| 57 |
time_range = f"{start_year} - {end_year}"
|
| 58 |
title = exp.position.title
|
| 59 |
primary_job_type = exp.work_experience_classification.primary_job_type.value
|
| 60 |
secondary_job_type = exp.work_experience_classification.secondary_job_type.value
|
| 61 |
|
| 62 |
+
work_exp_str = (
|
| 63 |
+
f"### Work Experience {idx}\n"
|
| 64 |
+
f"**Company:** {company}\n\n"
|
| 65 |
+
f"**Time Range:** {time_range}\n\n"
|
| 66 |
+
f"**Title:** {title}\n\n"
|
| 67 |
+
f"**Primary Job Type:** {primary_job_type}\n\n"
|
| 68 |
+
f"**Secondary Job Type:** {secondary_job_type}\n\n"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
# Investing focus
|
| 72 |
if exp.investing_focus_asset_class_classification:
|
| 73 |
asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
|
| 74 |
sector = (
|
| 75 |
exp.investing_focus_sector_classification.investing_focus_sector.value
|
| 76 |
+
if exp.investing_focus_sector_classification
|
| 77 |
+
else "N/A"
|
| 78 |
)
|
| 79 |
work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
|
| 80 |
work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
|
| 81 |
+
|
| 82 |
# Investment banking classification
|
| 83 |
if exp.investment_banking_classification:
|
| 84 |
ib_group = exp.investment_banking_classification.investment_banking_group.value
|
| 85 |
work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
|
| 86 |
+
|
| 87 |
work_experience_outputs.append(work_exp_str)
|
| 88 |
+
|
| 89 |
# Combine outputs
|
| 90 |
+
education_output = "\n\n".join(education_outputs)
|
| 91 |
+
work_experience_output = "\n\n".join(work_experience_outputs)
|
| 92 |
+
|
| 93 |
full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
|
| 94 |
return full_output
|
| 95 |
|
| 96 |
+
|
| 97 |
def main():
|
| 98 |
# Define Gradio interface
|
| 99 |
with gr.Blocks() as demo:
|
| 100 |
gr.Markdown("# LinkedIn Profile Analyzer")
|
| 101 |
gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
|
| 102 |
+
|
| 103 |
profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
|
| 104 |
analyze_button = gr.Button("Analyze")
|
| 105 |
output = gr.Markdown()
|
| 106 |
+
|
| 107 |
async def on_analyze_click(profile_linkedin):
|
| 108 |
if not profile_linkedin:
|
| 109 |
return "Please enter a valid LinkedIn Profile URL."
|
|
|
|
| 112 |
return result
|
| 113 |
except Exception as e:
|
| 114 |
return f"An error occurred: {str(e)}"
|
| 115 |
+
|
| 116 |
analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
|
| 117 |
demo.launch()
|
| 118 |
|
| 119 |
+
|
| 120 |
if __name__ == "__main__":
|
| 121 |
main()
|
src/vsp/app/bindings.py
CHANGED
|
@@ -5,8 +5,6 @@ from vsp.llm.llm_cache import LLMCache
|
|
| 5 |
from vsp.llm.openai.openai import AsyncOpenAIService
|
| 6 |
from vsp.llm.openai.openai_model import OpenAIModel
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
prompt_loader = PromptLoader()
|
| 11 |
llm_cache = LLMCache()
|
| 12 |
open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)
|
|
|
|
| 5 |
from vsp.llm.openai.openai import AsyncOpenAIService
|
| 6 |
from vsp.llm.openai.openai_model import OpenAIModel
|
| 7 |
|
|
|
|
|
|
|
| 8 |
prompt_loader = PromptLoader()
|
| 9 |
llm_cache = LLMCache()
|
| 10 |
open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)
|
src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py
CHANGED
|
@@ -134,7 +134,6 @@ class WorkExperienceClassifier:
|
|
| 134 |
except KeyError as e:
|
| 135 |
raise ValueError(f"Unknown job type: {str(e)}")
|
| 136 |
|
| 137 |
-
|
| 138 |
return WorkExperienceClassification(
|
| 139 |
primary_job_type=PrimaryJobType(primary_job_type),
|
| 140 |
secondary_job_type=SecondaryJobType(secondary_job_type),
|
|
|
|
| 134 |
except KeyError as e:
|
| 135 |
raise ValueError(f"Unknown job type: {str(e)}")
|
| 136 |
|
|
|
|
| 137 |
return WorkExperienceClassification(
|
| 138 |
primary_job_type=PrimaryJobType(primary_job_type),
|
| 139 |
secondary_job_type=SecondaryJobType(secondary_job_type),
|
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py
CHANGED
|
@@ -124,8 +124,6 @@ class InvestingFocusAssetClassClassifier:
|
|
| 124 |
except KeyError as e:
|
| 125 |
raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
classification = InvestingFocusAssetClassClassification(
|
| 130 |
investing_focus_asset_class=investing_focus_asset_class,
|
| 131 |
reasoning=parsed["reasoning"],
|
|
@@ -167,5 +165,5 @@ class InvestingFocusAssetClassClassifier:
|
|
| 167 |
resume=format_profile_as_resume(linkedin_profile),
|
| 168 |
work_experience=format_position(work_experience),
|
| 169 |
)
|
| 170 |
-
result = await
|
| 171 |
return result
|
|
|
|
| 124 |
except KeyError as e:
|
| 125 |
raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
|
| 126 |
|
|
|
|
|
|
|
| 127 |
classification = InvestingFocusAssetClassClassification(
|
| 128 |
investing_focus_asset_class=investing_focus_asset_class,
|
| 129 |
reasoning=parsed["reasoning"],
|
|
|
|
| 165 |
resume=format_profile_as_resume(linkedin_profile),
|
| 166 |
work_experience=format_position(work_experience),
|
| 167 |
)
|
| 168 |
+
result = await prompt.evaluate() # type: ignore
|
| 169 |
return result
|
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py
CHANGED
|
@@ -128,7 +128,6 @@ class InvestmentBankingGroupClassifier:
|
|
| 128 |
except KeyError as e:
|
| 129 |
raise ValueError(f"Unknown investment banking group: {str(e)}")
|
| 130 |
|
| 131 |
-
|
| 132 |
return InvestmentBankingGroupClassification(
|
| 133 |
investment_banking_group=investment_banking_group,
|
| 134 |
reasoning=parsed["reasoning"],
|
|
|
|
| 128 |
except KeyError as e:
|
| 129 |
raise ValueError(f"Unknown investment banking group: {str(e)}")
|
| 130 |
|
|
|
|
| 131 |
return InvestmentBankingGroupClassification(
|
| 132 |
investment_banking_group=investment_banking_group,
|
| 133 |
reasoning=parsed["reasoning"],
|
src/vsp/app/main.py
CHANGED
|
@@ -17,12 +17,17 @@ Usage:
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
import asyncio
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
from pydantic import BaseModel, Field
|
| 23 |
|
| 24 |
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
|
| 25 |
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
|
|
|
|
|
|
| 26 |
WorkExperienceClassification,
|
| 27 |
WorkExperienceClassifier,
|
| 28 |
)
|
|
@@ -92,6 +97,8 @@ class LinkedinProfileClassificationResults(BaseModel):
|
|
| 92 |
|
| 93 |
classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
|
| 94 |
classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
class VspDataEnrichment:
|
|
@@ -119,6 +126,83 @@ class VspDataEnrichment:
|
|
| 119 |
self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
|
| 120 |
self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
|
| 123 |
"""
|
| 124 |
Process a LinkedIn profile and classify its education and work experiences.
|
|
@@ -171,7 +255,11 @@ class VspDataEnrichment:
|
|
| 171 |
)
|
| 172 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 173 |
|
| 174 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
asset_class_task = (
|
| 176 |
self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
|
| 177 |
profile, position
|
|
@@ -188,8 +276,13 @@ class VspDataEnrichment:
|
|
| 188 |
|
| 189 |
classified_work_experiences.append(classified_work_experience)
|
| 190 |
|
|
|
|
|
|
|
| 191 |
return LinkedinProfileClassificationResults(
|
| 192 |
-
classified_educations=classified_educations,
|
|
|
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
|
| 195 |
|
|
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
import asyncio
|
| 20 |
+
import calendar
|
| 21 |
+
from collections import defaultdict
|
| 22 |
+
from datetime import date
|
| 23 |
+
from typing import List, Mapping, Sequence
|
| 24 |
|
| 25 |
from pydantic import BaseModel, Field
|
| 26 |
|
| 27 |
from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
|
| 28 |
from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
|
| 29 |
+
PrimaryJobType,
|
| 30 |
+
SecondaryJobType,
|
| 31 |
WorkExperienceClassification,
|
| 32 |
WorkExperienceClassifier,
|
| 33 |
)
|
|
|
|
| 97 |
|
| 98 |
classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
|
| 99 |
classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
|
| 100 |
+
full_time_work_experience_years: float = Field(default=0.0)
|
| 101 |
+
full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)
|
| 102 |
|
| 103 |
|
| 104 |
class VspDataEnrichment:
|
|
|
|
| 126 |
self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
|
| 127 |
self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
|
| 128 |
|
| 129 |
+
def estimate_full_time_experience_by_secondary_job_type(
|
| 130 |
+
self, classified_work_experiences: List[ClassifiedWorkExperience]
|
| 131 |
+
) -> Mapping[SecondaryJobType, float]:
|
| 132 |
+
# Define current date
|
| 133 |
+
current_date = date(2024, 9, 18)
|
| 134 |
+
|
| 135 |
+
# List to store all events (start or end of intervals)
|
| 136 |
+
events = []
|
| 137 |
+
|
| 138 |
+
# Set to store all observed SecondaryJobTypes
|
| 139 |
+
observed_secondary_job_types = set()
|
| 140 |
+
|
| 141 |
+
for cwe in classified_work_experiences:
|
| 142 |
+
classification = cwe.work_experience_classification.primary_job_type
|
| 143 |
+
secondary_job_type = cwe.work_experience_classification.secondary_job_type
|
| 144 |
+
|
| 145 |
+
if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
|
| 146 |
+
# Normalize start date
|
| 147 |
+
start = cwe.position.start
|
| 148 |
+
if not start or not start.year:
|
| 149 |
+
continue
|
| 150 |
+
start_year = start.year
|
| 151 |
+
start_month = start.month if start.month else 1
|
| 152 |
+
start_day = start.day if start.day else 1
|
| 153 |
+
start_date = date(start_year, start_month, start_day)
|
| 154 |
+
|
| 155 |
+
# Normalize end date
|
| 156 |
+
end = cwe.position.end
|
| 157 |
+
if end is None:
|
| 158 |
+
end_date = current_date
|
| 159 |
+
else:
|
| 160 |
+
if not end.year:
|
| 161 |
+
continue
|
| 162 |
+
end_year = end.year
|
| 163 |
+
end_month = end.month if end.month else 12
|
| 164 |
+
if end.day:
|
| 165 |
+
end_day = end.day
|
| 166 |
+
else:
|
| 167 |
+
# Get last day of the month
|
| 168 |
+
_, end_day = calendar.monthrange(end_year, end_month)
|
| 169 |
+
end_date = date(end_year, end_month, end_day)
|
| 170 |
+
|
| 171 |
+
if start_date > end_date:
|
| 172 |
+
continue # Skip invalid intervals
|
| 173 |
+
|
| 174 |
+
# Add events for sweep-line algorithm
|
| 175 |
+
events.append((start_date, "start", secondary_job_type))
|
| 176 |
+
events.append((end_date, "end", secondary_job_type))
|
| 177 |
+
|
| 178 |
+
observed_secondary_job_types.add(secondary_job_type)
|
| 179 |
+
|
| 180 |
+
# Sort events by date
|
| 181 |
+
events.sort(key=lambda x: x[0])
|
| 182 |
+
|
| 183 |
+
active_secondary_job_types = set()
|
| 184 |
+
last_date = None
|
| 185 |
+
durations = defaultdict(int) # in days
|
| 186 |
+
|
| 187 |
+
for event_date, event_type, secondary_job_type in events:
|
| 188 |
+
if last_date is not None and event_date > last_date:
|
| 189 |
+
interval_duration = (event_date - last_date).days
|
| 190 |
+
# Distribute the interval_duration among active_secondary_job_types
|
| 191 |
+
for active_type in active_secondary_job_types:
|
| 192 |
+
durations[active_type] += interval_duration
|
| 193 |
+
|
| 194 |
+
if event_type == "start":
|
| 195 |
+
active_secondary_job_types.add(secondary_job_type)
|
| 196 |
+
elif event_type == "end":
|
| 197 |
+
active_secondary_job_types.discard(secondary_job_type)
|
| 198 |
+
|
| 199 |
+
last_date = event_date
|
| 200 |
+
|
| 201 |
+
# Convert durations from days to years
|
| 202 |
+
durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}
|
| 203 |
+
|
| 204 |
+
return durations_in_years
|
| 205 |
+
|
| 206 |
async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
|
| 207 |
"""
|
| 208 |
Process a LinkedIn profile and classify its education and work experiences.
|
|
|
|
| 255 |
)
|
| 256 |
classified_work_experience.investment_banking_classification = ib_classification
|
| 257 |
|
| 258 |
+
if (
|
| 259 |
+
work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING
|
| 260 |
+
and not work_classification.primary_job_type
|
| 261 |
+
== work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR
|
| 262 |
+
):
|
| 263 |
asset_class_task = (
|
| 264 |
self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
|
| 265 |
profile, position
|
|
|
|
| 276 |
|
| 277 |
classified_work_experiences.append(classified_work_experience)
|
| 278 |
|
| 279 |
+
experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
|
| 280 |
+
total_work_experience = sum(experience_by_job_type.values())
|
| 281 |
return LinkedinProfileClassificationResults(
|
| 282 |
+
classified_educations=classified_educations,
|
| 283 |
+
classified_work_experiences=classified_work_experiences,
|
| 284 |
+
full_time_work_experience_years=total_work_experience,
|
| 285 |
+
full_time_work_experience_by_secondary=experience_by_job_type,
|
| 286 |
)
|
| 287 |
|
| 288 |
|
src/vsp/app/model/linkedin/linkedin_models.py
CHANGED
|
@@ -91,6 +91,7 @@ class Position(StartEndMixin):
|
|
| 91 |
class Skill(BaseSchema):
|
| 92 |
name: str | None = None
|
| 93 |
|
|
|
|
| 94 |
class Course(BaseSchema):
|
| 95 |
name: str | None = None
|
| 96 |
number: str | None = None
|
|
|
|
| 91 |
class Skill(BaseSchema):
|
| 92 |
name: str | None = None
|
| 93 |
|
| 94 |
+
|
| 95 |
class Course(BaseSchema):
|
| 96 |
name: str | None = None
|
| 97 |
number: str | None = None
|
src/vsp/app/scrapers/linkedin_downloader.py
CHANGED
|
@@ -140,13 +140,11 @@ class LinkedinDownloader:
|
|
| 140 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 141 |
return (headers, querystring)
|
| 142 |
|
| 143 |
-
|
| 144 |
def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
|
| 145 |
querystring = {"username": company_username}
|
| 146 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 147 |
return (headers, querystring)
|
| 148 |
|
| 149 |
-
|
| 150 |
async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
|
| 151 |
new_positions = []
|
| 152 |
for position in profile.positions:
|
|
@@ -159,8 +157,8 @@ class LinkedinDownloader:
|
|
| 159 |
async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
|
| 160 |
if response.status == 200:
|
| 161 |
data = await response.json()
|
| 162 |
-
description = data[
|
| 163 |
-
tagline = data[
|
| 164 |
|
| 165 |
result = []
|
| 166 |
if tagline:
|
|
@@ -168,11 +166,15 @@ class LinkedinDownloader:
|
|
| 168 |
if description:
|
| 169 |
result.append(description)
|
| 170 |
|
| 171 |
-
company_description =
|
| 172 |
# Create a new Position instance with the updated company_description
|
| 173 |
position = position.model_copy(update={"company_description": company_description})
|
| 174 |
else:
|
| 175 |
-
logger.error(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
new_positions.append(position)
|
| 177 |
# Update the positions list with the new positions
|
| 178 |
profile.positions = new_positions
|
|
@@ -215,7 +217,6 @@ class LinkedinDownloader:
|
|
| 215 |
return profile
|
| 216 |
|
| 217 |
|
| 218 |
-
|
| 219 |
async def main():
|
| 220 |
# Initialize the LinkedinDownloader
|
| 221 |
downloader = LinkedinDownloader()
|
|
@@ -232,5 +233,6 @@ async def main():
|
|
| 232 |
except Exception as e:
|
| 233 |
print(f"An unexpected error occurred: {str(e)}")
|
| 234 |
|
|
|
|
| 235 |
if __name__ == "__main__":
|
| 236 |
-
asyncio.run(main())
|
|
|
|
| 140 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 141 |
return (headers, querystring)
|
| 142 |
|
|
|
|
| 143 |
def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
|
| 144 |
querystring = {"username": company_username}
|
| 145 |
headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
|
| 146 |
return (headers, querystring)
|
| 147 |
|
|
|
|
| 148 |
async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
|
| 149 |
new_positions = []
|
| 150 |
for position in profile.positions:
|
|
|
|
| 157 |
async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
|
| 158 |
if response.status == 200:
|
| 159 |
data = await response.json()
|
| 160 |
+
description = data["data"].get("description")
|
| 161 |
+
tagline = data["data"].get("tagline")
|
| 162 |
|
| 163 |
result = []
|
| 164 |
if tagline:
|
|
|
|
| 166 |
if description:
|
| 167 |
result.append(description)
|
| 168 |
|
| 169 |
+
company_description = "\n\n".join(result)
|
| 170 |
# Create a new Position instance with the updated company_description
|
| 171 |
position = position.model_copy(update={"company_description": company_description})
|
| 172 |
else:
|
| 173 |
+
logger.error(
|
| 174 |
+
"Failed to fetch Linkedin company profile",
|
| 175 |
+
company_username=company_username,
|
| 176 |
+
status=response.status,
|
| 177 |
+
)
|
| 178 |
new_positions.append(position)
|
| 179 |
# Update the positions list with the new positions
|
| 180 |
profile.positions = new_positions
|
|
|
|
| 217 |
return profile
|
| 218 |
|
| 219 |
|
|
|
|
| 220 |
async def main():
|
| 221 |
# Initialize the LinkedinDownloader
|
| 222 |
downloader = LinkedinDownloader()
|
|
|
|
| 233 |
except Exception as e:
|
| 234 |
print(f"An unexpected error occurred: {str(e)}")
|
| 235 |
|
| 236 |
+
|
| 237 |
if __name__ == "__main__":
|
| 238 |
+
asyncio.run(main())
|