Spaces:

pquiggles
/

vsp-demo

Runtime error

App Files Files Community

pquiggles commited on Sep 19, 2024

Commit

819725d

1 Parent(s): 0481dfc

fix after merge

Browse files

Files changed (8) hide show

src/vsp/app/1st_gradio.py +36 -26
src/vsp/app/bindings.py +0 -2
src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py +0 -1
src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py +1 -3
src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py +0 -1
src/vsp/app/main.py +96 -3
src/vsp/app/model/linkedin/linkedin_models.py +1 -0
src/vsp/app/scrapers/linkedin_downloader.py +10 -8

src/vsp/app/1st_gradio.py CHANGED Viewed

@@ -4,9 +4,11 @@ import nest_asyncio
 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
 # Import your custom modules
 from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
-from vsp.app.main import VspDataEnrichment
 async def process_profile(profile_linkedin):
     downloader = LinkedinDownloader()
@@ -15,23 +17,26 @@ async def process_profile(profile_linkedin):
     enriched_profile = await enricher.process_linkedin_profile(profile=profile)
     return enriched_profile
 async def analyze_profile(profile_linkedin):
     enriched_profile = await process_profile(profile_linkedin)
     # Generate output from enriched_profile
     education_outputs = []
     work_experience_outputs = []
     # Process classified educations
     for idx, edu in enumerate(enriched_profile.classified_educations, 1):
         school = edu.education.school_name
         degree = edu.education.degree
         year = edu.education.end.year if edu.education.end else "N/A"
         classification = edu.classification.output.value
-        education_outputs.append(f"### Education {idx}\n"
-                                 f"**School:** {school}\n\n"
-                                 f"**Degree:** {degree}\n\n"
-                                 f"**Year:** {year}\n\n"
-                                 f"**Classification:** {classification}\n")
     # Add total years of full-time work experience
     total_experience_years = enriched_profile.full_time_work_experience_years
@@ -43,58 +48,62 @@ async def analyze_profile(profile_linkedin):
         experience_output += "### Work Experience by Secondary Job Type:\n"
         for secondary_job_type, years in experience_by_secondary.items():
             experience_output += f"- {secondary_job_type.value}: {years} years\n"
     # Process classified work experiences
     for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
         company = exp.position.company_name
         start_year = exp.position.start.year if exp.position.start else "N/A"
-        end_year = exp.position.end.year if (exp.position.end and exp.position.end.year)  else "Present"
         time_range = f"{start_year} - {end_year}"
         title = exp.position.title
         primary_job_type = exp.work_experience_classification.primary_job_type.value
         secondary_job_type = exp.work_experience_classification.secondary_job_type.value
-        work_exp_str = (f"### Work Experience {idx}\n"
-                        f"**Company:** {company}\n\n"
-                        f"**Time Range:** {time_range}\n\n"
-                        f"**Title:** {title}\n\n"
-                        f"**Primary Job Type:** {primary_job_type}\n\n"
-                        f"**Secondary Job Type:** {secondary_job_type}\n\n")
         # Investing focus
         if exp.investing_focus_asset_class_classification:
             asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
             sector = (
                 exp.investing_focus_sector_classification.investing_focus_sector.value
-                if exp.investing_focus_sector_classification else "N/A"
             )
             work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
             work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
         # Investment banking classification
         if exp.investment_banking_classification:
             ib_group = exp.investment_banking_classification.investment_banking_group.value
             work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
         work_experience_outputs.append(work_exp_str)
     # Combine outputs
-    education_output = '\n\n'.join(education_outputs)
-    work_experience_output = '\n\n'.join(work_experience_outputs)
     full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
     return full_output
 def main():
     # Define Gradio interface
     with gr.Blocks() as demo:
         gr.Markdown("# LinkedIn Profile Analyzer")
         gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
         profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
         analyze_button = gr.Button("Analyze")
         output = gr.Markdown()
         async def on_analyze_click(profile_linkedin):
             if not profile_linkedin:
                 return "Please enter a valid LinkedIn Profile URL."
@@ -103,9 +112,10 @@ def main():
                 return result
             except Exception as e:
                 return f"An error occurred: {str(e)}"
         analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
     demo.launch()
 if __name__ == "__main__":
     main()

 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
+from vsp.app.main import VspDataEnrichment
 # Import your custom modules
 from vsp.app.scrapers.linkedin_downloader import LinkedinDownloader
 async def process_profile(profile_linkedin):
     downloader = LinkedinDownloader()
     enriched_profile = await enricher.process_linkedin_profile(profile=profile)
     return enriched_profile
 async def analyze_profile(profile_linkedin):
     enriched_profile = await process_profile(profile_linkedin)
     # Generate output from enriched_profile
     education_outputs = []
     work_experience_outputs = []
     # Process classified educations
     for idx, edu in enumerate(enriched_profile.classified_educations, 1):
         school = edu.education.school_name
         degree = edu.education.degree
         year = edu.education.end.year if edu.education.end else "N/A"
         classification = edu.classification.output.value
+        education_outputs.append(
+            f"### Education {idx}\n"
+            f"**School:** {school}\n\n"
+            f"**Degree:** {degree}\n\n"
+            f"**Year:** {year}\n\n"
+            f"**Classification:** {classification}\n"
+        )
     # Add total years of full-time work experience
     total_experience_years = enriched_profile.full_time_work_experience_years
         experience_output += "### Work Experience by Secondary Job Type:\n"
         for secondary_job_type, years in experience_by_secondary.items():
             experience_output += f"- {secondary_job_type.value}: {years} years\n"
     # Process classified work experiences
     for idx, exp in enumerate(enriched_profile.classified_work_experiences, 1):
         company = exp.position.company_name
         start_year = exp.position.start.year if exp.position.start else "N/A"
+        end_year = exp.position.end.year if (exp.position.end and exp.position.end.year) else "Present"
         time_range = f"{start_year} - {end_year}"
         title = exp.position.title
         primary_job_type = exp.work_experience_classification.primary_job_type.value
         secondary_job_type = exp.work_experience_classification.secondary_job_type.value
+        work_exp_str = (
+            f"### Work Experience {idx}\n"
+            f"**Company:** {company}\n\n"
+            f"**Time Range:** {time_range}\n\n"
+            f"**Title:** {title}\n\n"
+            f"**Primary Job Type:** {primary_job_type}\n\n"
+            f"**Secondary Job Type:** {secondary_job_type}\n\n"
+        )
         # Investing focus
         if exp.investing_focus_asset_class_classification:
             asset_class = exp.investing_focus_asset_class_classification.investing_focus_asset_class.value
             sector = (
                 exp.investing_focus_sector_classification.investing_focus_sector.value
+                if exp.investing_focus_sector_classification
+                else "N/A"
             )
             work_exp_str += f"**Investing Focus (Asset Class):** {asset_class}\n\n"
             work_exp_str += f"**Investing Focus (Sector):** {sector}\n\n"
         # Investment banking classification
         if exp.investment_banking_classification:
             ib_group = exp.investment_banking_classification.investment_banking_group.value
             work_exp_str += f"**Investment Banking Group:** {ib_group}\n"
         work_experience_outputs.append(work_exp_str)
     # Combine outputs
+    education_output = "\n\n".join(education_outputs)
+    work_experience_output = "\n\n".join(work_experience_outputs)
     full_output = f"# Classified Educations\n\n{education_output}\n\n# Classified Work Experiences\n\n{experience_output}\n\n{work_experience_output}"
     return full_output
 def main():
     # Define Gradio interface
     with gr.Blocks() as demo:
         gr.Markdown("# LinkedIn Profile Analyzer")
         gr.Markdown("Enter a LinkedIn profile URL to analyze educational and work experiences.")
         profile_linkedin = gr.Textbox(label="LinkedIn Profile URL")
         analyze_button = gr.Button("Analyze")
         output = gr.Markdown()
         async def on_analyze_click(profile_linkedin):
             if not profile_linkedin:
                 return "Please enter a valid LinkedIn Profile URL."
                 return result
             except Exception as e:
                 return f"An error occurred: {str(e)}"
         analyze_button.click(fn=on_analyze_click, inputs=profile_linkedin, outputs=output)
     demo.launch()
 if __name__ == "__main__":
     main()

src/vsp/app/bindings.py CHANGED Viewed

@@ -5,8 +5,6 @@ from vsp.llm.llm_cache import LLMCache
 from vsp.llm.openai.openai import AsyncOpenAIService
 from vsp.llm.openai.openai_model import OpenAIModel
 prompt_loader = PromptLoader()
 llm_cache = LLMCache()
 open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)

 from vsp.llm.openai.openai import AsyncOpenAIService
 from vsp.llm.openai.openai_model import OpenAIModel
 prompt_loader = PromptLoader()
 llm_cache = LLMCache()
 open_ai_service = CachedLLMService(llm_service=AsyncOpenAIService(OpenAIModel.GPT_4_O), cache=llm_cache)

src/vsp/app/classifiers/work_experience/general_work_experience_classifier.py CHANGED Viewed

@@ -134,7 +134,6 @@ class WorkExperienceClassifier:
         except KeyError as e:
             raise ValueError(f"Unknown job type: {str(e)}")
         return WorkExperienceClassification(
             primary_job_type=PrimaryJobType(primary_job_type),
             secondary_job_type=SecondaryJobType(secondary_job_type),

         except KeyError as e:
             raise ValueError(f"Unknown job type: {str(e)}")
         return WorkExperienceClassification(
             primary_job_type=PrimaryJobType(primary_job_type),
             secondary_job_type=SecondaryJobType(secondary_job_type),

src/vsp/app/classifiers/work_experience/investing_focus_asset_class_classifier.py CHANGED Viewed

@@ -124,8 +124,6 @@ class InvestingFocusAssetClassClassifier:
         except KeyError as e:
             raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
         classification = InvestingFocusAssetClassClassification(
             investing_focus_asset_class=investing_focus_asset_class,
             reasoning=parsed["reasoning"],
@@ -167,5 +165,5 @@ class InvestingFocusAssetClassClassifier:
             resume=format_profile_as_resume(linkedin_profile),
             work_experience=format_position(work_experience),
         )
-        result = await  prompt.evaluate()  # type: ignore
         return result

         except KeyError as e:
             raise ValueError(f"Unknown investing focus or asset class: {str(e)}")
         classification = InvestingFocusAssetClassClassification(
             investing_focus_asset_class=investing_focus_asset_class,
             reasoning=parsed["reasoning"],
             resume=format_profile_as_resume(linkedin_profile),
             work_experience=format_position(work_experience),
         )
+        result = await prompt.evaluate()  # type: ignore
         return result

src/vsp/app/classifiers/work_experience/investment_banking_group_classifier.py CHANGED Viewed

@@ -128,7 +128,6 @@ class InvestmentBankingGroupClassifier:
         except KeyError as e:
             raise ValueError(f"Unknown investment banking group: {str(e)}")
         return InvestmentBankingGroupClassification(
             investment_banking_group=investment_banking_group,
             reasoning=parsed["reasoning"],

         except KeyError as e:
             raise ValueError(f"Unknown investment banking group: {str(e)}")
         return InvestmentBankingGroupClassification(
             investment_banking_group=investment_banking_group,
             reasoning=parsed["reasoning"],

src/vsp/app/main.py CHANGED Viewed

@@ -17,12 +17,17 @@ Usage:
 """
 import asyncio
-from typing import Sequence
 from pydantic import BaseModel, Field
 from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
 from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
     WorkExperienceClassification,
     WorkExperienceClassifier,
 )
@@ -92,6 +97,8 @@ class LinkedinProfileClassificationResults(BaseModel):
     classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
     classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
 class VspDataEnrichment:
@@ -119,6 +126,83 @@ class VspDataEnrichment:
         self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
         self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
     async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
         """
         Process a LinkedIn profile and classify its education and work experiences.
@@ -171,7 +255,11 @@ class VspDataEnrichment:
                     )
                     classified_work_experience.investment_banking_classification = ib_classification
-                if work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING:
                     asset_class_task = (
                         self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
                             profile, position
@@ -188,8 +276,13 @@ class VspDataEnrichment:
             classified_work_experiences.append(classified_work_experience)
         return LinkedinProfileClassificationResults(
-            classified_educations=classified_educations, classified_work_experiences=classified_work_experiences
         )

 """
 import asyncio
+import calendar
+from collections import defaultdict
+from datetime import date
+from typing import List, Mapping, Sequence
 from pydantic import BaseModel, Field
 from vsp.app.classifiers.education_classifier import EducationClassification, EducationClassifier
 from vsp.app.classifiers.work_experience.general_work_experience_classifier import (
+    PrimaryJobType,
+    SecondaryJobType,
     WorkExperienceClassification,
     WorkExperienceClassifier,
 )
     classified_educations: Sequence[ClassifiedEducation] = Field(default_factory=list)
     classified_work_experiences: Sequence[ClassifiedWorkExperience] = Field(default_factory=list)
+    full_time_work_experience_years: float = Field(default=0.0)
+    full_time_work_experience_by_secondary: Mapping[SecondaryJobType, float] = Field(default_factory=dict)
 class VspDataEnrichment:
         self._investing_focus_asset_class_classifier = InvestingFocusAssetClassClassifier()
         self._investing_focus_sector_classifier = InvestingFocusSectorClassifier()
+    def estimate_full_time_experience_by_secondary_job_type(
+        self, classified_work_experiences: List[ClassifiedWorkExperience]
+    ) -> Mapping[SecondaryJobType, float]:
+        # Define current date
+        current_date = date(2024, 9, 18)
+        # List to store all events (start or end of intervals)
+        events = []
+        # Set to store all observed SecondaryJobTypes
+        observed_secondary_job_types = set()
+        for cwe in classified_work_experiences:
+            classification = cwe.work_experience_classification.primary_job_type
+            secondary_job_type = cwe.work_experience_classification.secondary_job_type
+            if classification == PrimaryJobType.FULL_TIME and secondary_job_type:
+                # Normalize start date
+                start = cwe.position.start
+                if not start or not start.year:
+                    continue
+                start_year = start.year
+                start_month = start.month if start.month else 1
+                start_day = start.day if start.day else 1
+                start_date = date(start_year, start_month, start_day)
+                # Normalize end date
+                end = cwe.position.end
+                if end is None:
+                    end_date = current_date
+                else:
+                    if not end.year:
+                        continue
+                    end_year = end.year
+                    end_month = end.month if end.month else 12
+                    if end.day:
+                        end_day = end.day
+                    else:
+                        # Get last day of the month
+                        _, end_day = calendar.monthrange(end_year, end_month)
+                    end_date = date(end_year, end_month, end_day)
+                if start_date > end_date:
+                    continue  # Skip invalid intervals
+                # Add events for sweep-line algorithm
+                events.append((start_date, "start", secondary_job_type))
+                events.append((end_date, "end", secondary_job_type))
+                observed_secondary_job_types.add(secondary_job_type)
+        # Sort events by date
+        events.sort(key=lambda x: x[0])
+        active_secondary_job_types = set()
+        last_date = None
+        durations = defaultdict(int)  # in days
+        for event_date, event_type, secondary_job_type in events:
+            if last_date is not None and event_date > last_date:
+                interval_duration = (event_date - last_date).days
+                # Distribute the interval_duration among active_secondary_job_types
+                for active_type in active_secondary_job_types:
+                    durations[active_type] += interval_duration
+            if event_type == "start":
+                active_secondary_job_types.add(secondary_job_type)
+            elif event_type == "end":
+                active_secondary_job_types.discard(secondary_job_type)
+            last_date = event_date
+        # Convert durations from days to years
+        durations_in_years = {stype: round(days / 365.25, 2) for stype, days in durations.items()}
+        return durations_in_years
     async def process_linkedin_profile(self, profile: LinkedinProfile) -> LinkedinProfileClassificationResults:
         """
         Process a LinkedIn profile and classify its education and work experiences.
                     )
                     classified_work_experience.investment_banking_classification = ib_classification
+                if (
+                    work_classification.secondary_job_type == work_classification.secondary_job_type.INVESTING
+                    and not work_classification.primary_job_type
+                    == work_classification.primary_job_type.ADVISORY_BOARD_INVESTOR
+                ):
                     asset_class_task = (
                         self._investing_focus_asset_class_classifier.classify_investing_focus_asset_class(
                             profile, position
             classified_work_experiences.append(classified_work_experience)
+        experience_by_job_type = self.estimate_full_time_experience_by_secondary_job_type(classified_work_experiences)
+        total_work_experience = sum(experience_by_job_type.values())
         return LinkedinProfileClassificationResults(
+            classified_educations=classified_educations,
+            classified_work_experiences=classified_work_experiences,
+            full_time_work_experience_years=total_work_experience,
+            full_time_work_experience_by_secondary=experience_by_job_type,
         )

src/vsp/app/model/linkedin/linkedin_models.py CHANGED Viewed

@@ -91,6 +91,7 @@ class Position(StartEndMixin):
 class Skill(BaseSchema):
     name: str | None = None
 class Course(BaseSchema):
     name: str | None = None
     number: str | None = None

 class Skill(BaseSchema):
     name: str | None = None
 class Course(BaseSchema):
     name: str | None = None
     number: str | None = None

src/vsp/app/scrapers/linkedin_downloader.py CHANGED Viewed

@@ -140,13 +140,11 @@ class LinkedinDownloader:
         headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
         return (headers, querystring)
     def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
         querystring = {"username": company_username}
         headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
         return (headers, querystring)
     async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
         new_positions = []
         for position in profile.positions:
@@ -159,8 +157,8 @@ class LinkedinDownloader:
             async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
                 if response.status == 200:
                     data = await response.json()
-                    description = data['data'].get('description')
-                    tagline = data['data'].get('tagline')
                     result = []
                     if tagline:
@@ -168,11 +166,15 @@ class LinkedinDownloader:
                     if description:
                         result.append(description)
-                    company_description = '\n\n'.join(result)
                     # Create a new Position instance with the updated company_description
                     position = position.model_copy(update={"company_description": company_description})
                 else:
-                    logger.error("Failed to fetch Linkedin company profile", company_username=company_username, status=response.status)
                 new_positions.append(position)
         # Update the positions list with the new positions
         profile.positions = new_positions
@@ -215,7 +217,6 @@ class LinkedinDownloader:
                 return profile
 async def main():
     # Initialize the LinkedinDownloader
     downloader = LinkedinDownloader()
@@ -232,5 +233,6 @@ async def main():
     except Exception as e:
         print(f"An unexpected error occurred: {str(e)}")
 if __name__ == "__main__":
-    asyncio.run(main())

         headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
         return (headers, querystring)
     def _compose_company_info_request(self, company_username: str) -> tuple[dict[str, str], dict[str, str]]:
         querystring = {"username": company_username}
         headers = {"X-RapidAPI-Key": self._api_key, "X-RapidAPI-Host": self._X_RAPIDAPI_HOST}
         return (headers, querystring)
     async def augment_company_descriptions(self, session: aiohttp.ClientSession, profile: LinkedinProfile):
         new_positions = []
         for position in profile.positions:
             async with session.get(self._URL + "get-company-details", headers=headers, params=querystring) as response:
                 if response.status == 200:
                     data = await response.json()
+                    description = data["data"].get("description")
+                    tagline = data["data"].get("tagline")
                     result = []
                     if tagline:
                     if description:
                         result.append(description)
+                    company_description = "\n\n".join(result)
                     # Create a new Position instance with the updated company_description
                     position = position.model_copy(update={"company_description": company_description})
                 else:
+                    logger.error(
+                        "Failed to fetch Linkedin company profile",
+                        company_username=company_username,
+                        status=response.status,
+                    )
                 new_positions.append(position)
         # Update the positions list with the new positions
         profile.positions = new_positions
                 return profile
 async def main():
     # Initialize the LinkedinDownloader
     downloader = LinkedinDownloader()
     except Exception as e:
         print(f"An unexpected error occurred: {str(e)}")
 if __name__ == "__main__":
+    asyncio.run(main())