File size: 5,695 Bytes
1cdaa9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66784b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f351996
d66784b
 
 
 
 
 
 
 
 
 
f351996
 
1cdaa9c
 
d66784b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from typing import List, Dict, Optional
from opensearch_client import OpenSearchClient
from chroma_storage import ChromaMatchingSystem
from embeddings import JobPosting

class TwoPhaseSearchSystem:
    def __init__(self, chroma_matcher: ChromaMatchingSystem, opensearch_client: OpenSearchClient):
        self.chroma_matcher = chroma_matcher
        self.opensearch_client = opensearch_client

    def search_candidates(self, 
                        job_posting: JobPosting,
                        search_params: Dict,
                        n_results: int = 10) -> List[Dict]:
        """
        Two-phase search:
        1. OpenSearch boolean filtering
        2. ChromaDB embedding matching
        """
        # Phase 1: OpenSearch Filtering
        opensearch_results = self.opensearch_client.search_jobseekers(search_params)

        if not opensearch_results:
            return []

        # Phase 2: ChromaDB Embedding Matching
        # Get matches only for jobseekers that passed OpenSearch filtering
        matches = self.chroma_matcher.get_matches(
            job_posting=job_posting,
            n_results=n_results,
            where_conditions={"jobseeker_id": {"$in": [r['jobseeker_id'] for r in opensearch_results]}}
        )

        # Combine OpenSearch scores with ChromaDB match results
        final_results = []
        for match in matches:
            # Find corresponding OpenSearch result
            opensearch_result = next(
                (r for r in opensearch_results if r['jobseeker_id'] == match.jobseeker_id), 
                None
            )
            if opensearch_result:
                final_results.append({
                    'jobseeker_id': match.jobseeker_id,
                    'similarity_score': match.similarity_score,
                    'field_scores': match.field_scores,
                    'explanation': match.explanation,
                    'opensearch_score': opensearch_result.get('_score', 0.0)
                })

        return final_results

    def build_search_params(self,
                       job_posting: JobPosting,
                       location: Optional[Dict] = None,
                       certifications: Optional[Dict] = None,
                       tags: Optional[List[Dict]] = None,
                       minimum_skills_match: int = 3,
                       minimum_results: int = 10) -> Dict:
        """
        Build OpenSearch query parameters with fallback logic
        """
        # Try increasingly relaxed queries until we get enough results
        params_list = self._generate_param_variations(
            job_posting=job_posting,
            location=location,
            certifications=certifications,
            tags=tags,
            minimum_skills_match=minimum_skills_match
        )
        
        # Try each parameter set until we get enough results
        for params in params_list:
            results = self.opensearch_client.search_jobseekers(params)
            if results and len(results) >= minimum_results:
                return params
                
        # If no parameter set gives enough results, return most basic query
        return {
            "skills": [skill.skill_name for skill in job_posting.primary_skills],
            "minimum_skills_should_match": 1,  # Most relaxed skills matching
            "size": 100,
            "sort_by": ["score"]
        }

    def _generate_param_variations(self,
                                job_posting: JobPosting,
                                location: Optional[Dict] = None,
                                certifications: Optional[Dict] = None,
                                tags: Optional[List[Dict]] = None,
                                minimum_skills_match: int = 3) -> List[Dict]:
        """Generate variations of search parameters from strict to relaxed"""
        primary_skills = [skill.skill_name for skill in job_posting.primary_skills]
        
        # Start with most restrictive parameters
        strict_params = {
            "boolean_search_query": job_posting.title,
            "skills": primary_skills,
            "minimum_skills_should_match": minimum_skills_match,
            "size": 100,
            "sort_by": ["score"]
        }
        
        # Add additional filters if provided
        if location:
            strict_params.update({
                "country_filter": location.get("country"),
                "state_filter": location.get("state")
            })
        
        if certifications:
            strict_params.update({
                "certifications_name": certifications.get("name"),
                "certifications_organization": certifications.get("organization")
            })
        
        if tags:
            strict_params["tags"] = tags
        
        # Create variations with progressively fewer restrictions
        variations = [
            strict_params,  # Try all filters first
            {**strict_params, "minimum_skills_should_match": 2},  # Relax skills matching
            {k: v for k, v in strict_params.items() 
            if k not in ["certifications_name", "certifications_organization"]},  # Remove cert filters
            {k: v for k, v in strict_params.items() 
            if k not in ["state_filter", "country_filter"]},  # Remove location filters
            {k: v for k, v in strict_params.items() 
            if k not in ["tags"]},  # Remove tag filters
            {  # Most basic query
                "skills": primary_skills,
                "minimum_skills_should_match": 1,
                "size": 100,
                "sort_by": ["score"]
            }
        ]
        
        return variations