File size: 5,695 Bytes
1cdaa9c d66784b f351996 d66784b f351996 1cdaa9c d66784b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from typing import List, Dict, Optional
from opensearch_client import OpenSearchClient
from chroma_storage import ChromaMatchingSystem
from embeddings import JobPosting
class TwoPhaseSearchSystem:
def __init__(self, chroma_matcher: ChromaMatchingSystem, opensearch_client: OpenSearchClient):
self.chroma_matcher = chroma_matcher
self.opensearch_client = opensearch_client
def search_candidates(self,
job_posting: JobPosting,
search_params: Dict,
n_results: int = 10) -> List[Dict]:
"""
Two-phase search:
1. OpenSearch boolean filtering
2. ChromaDB embedding matching
"""
# Phase 1: OpenSearch Filtering
opensearch_results = self.opensearch_client.search_jobseekers(search_params)
if not opensearch_results:
return []
# Phase 2: ChromaDB Embedding Matching
# Get matches only for jobseekers that passed OpenSearch filtering
matches = self.chroma_matcher.get_matches(
job_posting=job_posting,
n_results=n_results,
where_conditions={"jobseeker_id": {"$in": [r['jobseeker_id'] for r in opensearch_results]}}
)
# Combine OpenSearch scores with ChromaDB match results
final_results = []
for match in matches:
# Find corresponding OpenSearch result
opensearch_result = next(
(r for r in opensearch_results if r['jobseeker_id'] == match.jobseeker_id),
None
)
if opensearch_result:
final_results.append({
'jobseeker_id': match.jobseeker_id,
'similarity_score': match.similarity_score,
'field_scores': match.field_scores,
'explanation': match.explanation,
'opensearch_score': opensearch_result.get('_score', 0.0)
})
return final_results
def build_search_params(self,
job_posting: JobPosting,
location: Optional[Dict] = None,
certifications: Optional[Dict] = None,
tags: Optional[List[Dict]] = None,
minimum_skills_match: int = 3,
minimum_results: int = 10) -> Dict:
"""
Build OpenSearch query parameters with fallback logic
"""
# Try increasingly relaxed queries until we get enough results
params_list = self._generate_param_variations(
job_posting=job_posting,
location=location,
certifications=certifications,
tags=tags,
minimum_skills_match=minimum_skills_match
)
# Try each parameter set until we get enough results
for params in params_list:
results = self.opensearch_client.search_jobseekers(params)
if results and len(results) >= minimum_results:
return params
# If no parameter set gives enough results, return most basic query
return {
"skills": [skill.skill_name for skill in job_posting.primary_skills],
"minimum_skills_should_match": 1, # Most relaxed skills matching
"size": 100,
"sort_by": ["score"]
}
def _generate_param_variations(self,
job_posting: JobPosting,
location: Optional[Dict] = None,
certifications: Optional[Dict] = None,
tags: Optional[List[Dict]] = None,
minimum_skills_match: int = 3) -> List[Dict]:
"""Generate variations of search parameters from strict to relaxed"""
primary_skills = [skill.skill_name for skill in job_posting.primary_skills]
# Start with most restrictive parameters
strict_params = {
"boolean_search_query": job_posting.title,
"skills": primary_skills,
"minimum_skills_should_match": minimum_skills_match,
"size": 100,
"sort_by": ["score"]
}
# Add additional filters if provided
if location:
strict_params.update({
"country_filter": location.get("country"),
"state_filter": location.get("state")
})
if certifications:
strict_params.update({
"certifications_name": certifications.get("name"),
"certifications_organization": certifications.get("organization")
})
if tags:
strict_params["tags"] = tags
# Create variations with progressively fewer restrictions
variations = [
strict_params, # Try all filters first
{**strict_params, "minimum_skills_should_match": 2}, # Relax skills matching
{k: v for k, v in strict_params.items()
if k not in ["certifications_name", "certifications_organization"]}, # Remove cert filters
{k: v for k, v in strict_params.items()
if k not in ["state_filter", "country_filter"]}, # Remove location filters
{k: v for k, v in strict_params.items()
if k not in ["tags"]}, # Remove tag filters
{ # Most basic query
"skills": primary_skills,
"minimum_skills_should_match": 1,
"size": 100,
"sort_by": ["score"]
}
]
return variations |