# main.py from fastapi import FastAPI, HTTPException from pydantic import BaseModel from typing import List, Optional, Dict, Any from two_phase_search import TwoPhaseSearchSystem from chroma_storage import ChromaMatchingSystem from opensearch_client import OpenSearchClient from embeddings import JobPosting, Skill, EmbeddingManager, IndependentJobSeekerAssessmentRDS, JobseekerInfoRDS from encoder import create_encoders import numpy as np from datetime import datetime app = FastAPI() # Initialize systems chroma_matcher = ChromaMatchingSystem(collection_name="job_seekers") opensearch_client = OpenSearchClient() BASE_URL = "https://dev-abhinav.ngrok.io" search_system = TwoPhaseSearchSystem(chroma_matcher, opensearch_client, BASE_URL) # Initialize encoders and embedding manager job_encoder, seeker_encoder = create_encoders('all-mpnet-base-v2') embedding_manager = EmbeddingManager(job_encoder, seeker_encoder) # New schemas class JobPostingEmbeddingRequest(BaseModel): title: str role_description: str company_description: str primary_skills: List[str] secondary_skills: List[str] class JobPostingEmbeddingResponse(BaseModel): title_embedding: List[float] role_description_embedding: List[float] company_description_embedding: List[float] primary_skills_embedding: List[float] secondary_skills_embedding: List[float] class Title(BaseModel): name: str class ExperienceBaseWithoutCompanyCore(BaseModel): location_names: List[str] = [] title: Title is_primary: bool experience_summaries: List[str] = [] class EmbeddingExperience(ExperienceBaseWithoutCompanyCore): company_name: str class EducationCore(BaseModel): institution: Optional[str] = None degree: Optional[str] = None field_of_study: Optional[str] = None class CertificationCore(BaseModel): organization: Optional[str] = None name: str # start_date and end_date not required, map to '' if not provided class JobseekerEmbeddingRequest(BaseModel): summary: str skills: List[str] educations: List[EducationCore] certifications: List[CertificationCore] experiences: List[EmbeddingExperience] class JobseekerEmbeddingResponse(BaseModel): summary_embedding: List[float] skills_embedding: List[float] educations_embedding: List[float] certifications_embedding: List[float] experiences_embedding: List[float] @app.post("/embed/jobposting", response_model=JobPostingEmbeddingResponse) async def create_jobposting_embedding(req: JobPostingEmbeddingRequest): try: job = JobPosting( title=req.title, role_description=req.role_description, company_description=req.company_description, primary_skills=[Skill(skill_name=s) for s in req.primary_skills], secondary_skills=[Skill(skill_name=s) for s in req.secondary_skills] ) embeddings = embedding_manager.embed_jobposting(job) return { "title_embedding": embeddings['title'].tolist(), "role_description_embedding": embeddings['role_description'].tolist(), "company_description_embedding": embeddings['company_description'].tolist(), "primary_skills_embedding": embeddings['primary_skills'].tolist(), "secondary_skills_embedding": embeddings['secondary_skills'].tolist() } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/embed/jobseeker", response_model=JobseekerEmbeddingResponse) async def create_jobseeker_embedding(req: JobseekerEmbeddingRequest): try: # Map all skills into primary_skills, leave secondary_skills empty processed_obj = IndependentJobSeekerAssessmentRDS( primary_skills=req.skills, secondary_skills=[], experiences=[ { "experience_summaries": exp.experience_summaries, "title": exp.title.name, "is_primary": exp.is_primary, "location_names": exp.location_names, # Map company_name to company dict "company": {"name": exp.company_name} if exp.company_name else None, "start_date": "", # Not used directly, but can be empty "end_date": "" # Not used directly, but can be empty } for exp in req.experiences ], educations=[ { "degree": edu.degree if edu.degree else "", "field": edu.field_of_study if edu.field_of_study else "", "institution": edu.institution if edu.institution else "" } for edu in req.educations ], certifications=[ { "name": cert.name, "organization": cert.organization if cert.organization else "", "start_date": "", # empty as per instructions "end_date": "" } for cert in req.certifications ] ) unprocessed_obj = JobseekerInfoRDS( summary=req.summary ) embeddings = embedding_manager.embed_jobseeker(processed_obj, unprocessed_obj) # embeddings keys: summary, experience, primary_skills, secondary_skills, certifications, education # We need: summary_embedding, skills_embedding, educations_embedding, certifications_embedding, experiences_embedding # We put all skills into primary_skills. Ignore secondary_skills. # experiences map to 'experience' # educations map to 'education' return { "summary_embedding": embeddings['summary'].tolist(), "skills_embedding": embeddings['primary_skills'].tolist(), "educations_embedding": embeddings['education'].tolist(), "certifications_embedding": embeddings['certifications'].tolist(), "experiences_embedding": embeddings['experience'].tolist() } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)