Spaces:
Sleeping
Sleeping
Commit ·
4b3a33f
1
Parent(s): 84d4394
Implemented clustering
Browse files- Supabase/.temp/cli-latest +1 -1
- Supabase/functions/otp/index.ts +8 -5
- backend/api.py +12 -4
- backend/check_clusters_after_run.py +23 -0
- backend/check_db_clustering.py +33 -0
- backend/check_job_data.py +26 -0
- backend/debug_profile.json +90 -0
- backend/debug_score.py +55 -0
- backend/docs/efficiency_guide.md +41 -0
- backend/final_verify.py +38 -0
- backend/fix_profile_embeddings_trigger.sql +56 -0
- backend/generate_realistic_resumes.py +183 -0
- backend/inspect_columns.py +30 -0
- backend/inspect_schema.py +28 -0
- backend/inspect_schema_fixed.py +34 -0
- backend/out_cmd.txt +20 -0
- backend/realistic_synthetic_resumes.json +0 -0
- backend/remove_triggers_for_profile_embeddings.sql +19 -0
- backend/repair_system_mismatches.sql +104 -0
- backend/requirements.txt +1 -0
- backend/script_output.txt +0 -0
- backend/src/embeddings/benchmark_bge.py +55 -0
- backend/src/embeddings/evaluate_quality.py +197 -0
- backend/src/embeddings/job_embed.py +1 -1
- backend/src/embeddings/match_benchmark_granular.py +228 -0
- backend/src/embeddings/profile_entities_bench.py +115 -0
- backend/src/matching/similarity.py +40 -17
- backend/src/services/clustering_service.py +148 -0
- backend/src/services/test_clustering.py +21 -0
- backend/src/services/verify_labels.py +41 -0
- backend/supabase_ingest.py +9 -4
- backend/test_ingest_output.txt +0 -0
- debug_log.txt +15 -0
- entity_benchmark_scaled_results.txt +12 -0
- experimental_results.tex +53 -0
- match_benchmark_results.json +22 -0
- matching_analysis_report.md +0 -0
- quality_metrics_adversarial.json +6 -0
- schema_dump.txt +22 -0
- src/components/Admin/AdminLayout.jsx +36 -34
- src/components/Admin/TalentClusters.jsx +496 -0
- src/components/JobListings.jsx +36 -36
- src/pages/Admindashboard.jsx +13 -10
- src/pages/ApplicantProfile.jsx +17 -6
- system_architecture.txt +66 -0
Supabase/.temp/cli-latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
v2.
|
|
|
|
| 1 |
+
v2.75.0
|
Supabase/functions/otp/index.ts
CHANGED
|
@@ -6,7 +6,7 @@ const corsHeaders = {
|
|
| 6 |
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
|
| 7 |
};
|
| 8 |
|
| 9 |
-
serve(async (req) => {
|
| 10 |
if (req.method === 'OPTIONS') {
|
| 11 |
return new Response('ok', { headers: corsHeaders });
|
| 12 |
}
|
|
@@ -35,6 +35,7 @@ serve(async (req) => {
|
|
| 35 |
// ACTION: SEND SMS (VIA TWILIO)
|
| 36 |
// ==========================================
|
| 37 |
if (action === 'send') {
|
|
|
|
| 38 |
const { data: profile } = await supabaseAdmin
|
| 39 |
.from('profiles')
|
| 40 |
.select('phone')
|
|
@@ -88,10 +89,10 @@ serve(async (req) => {
|
|
| 88 |
console.error("Twilio Error:", errorText);
|
| 89 |
throw new Error("Failed to send SMS. Check server logs.");
|
| 90 |
}
|
| 91 |
-
/
|
| 92 |
|
| 93 |
return new Response(
|
| 94 |
-
JSON.stringify({ message: "OTP sent successfully" }),
|
| 95 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
|
| 96 |
);
|
| 97 |
}
|
|
@@ -100,6 +101,7 @@ serve(async (req) => {
|
|
| 100 |
// ACTION: VERIFY
|
| 101 |
// ==========================================
|
| 102 |
if (action === 'verify') {
|
|
|
|
| 103 |
if (!userCode) throw new Error("Missing OTP code");
|
| 104 |
|
| 105 |
const { data: profile } = await supabaseAdmin.from('profiles').select('phone').eq('id', user.id).single();
|
|
@@ -119,16 +121,17 @@ serve(async (req) => {
|
|
| 119 |
// Success
|
| 120 |
await supabaseAdmin.from('profiles').update({ is_phone_verified: true }).eq('id', user.id);
|
| 121 |
await supabaseAdmin.from('otp_verifications').delete().eq('phone', phone);
|
|
|
|
| 122 |
|
| 123 |
return new Response(
|
| 124 |
-
JSON.stringify({ message: "Phone verified successfully!" }),
|
| 125 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
|
| 126 |
);
|
| 127 |
}
|
| 128 |
|
| 129 |
return new Response(JSON.stringify({ error: "Invalid Action" }), { status: 400, headers: corsHeaders });
|
| 130 |
|
| 131 |
-
} catch (error) {
|
| 132 |
return new Response(
|
| 133 |
JSON.stringify({ error: error.message }),
|
| 134 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 }
|
|
|
|
| 6 |
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
|
| 7 |
};
|
| 8 |
|
| 9 |
+
serve(async (req: Request) => {
|
| 10 |
if (req.method === 'OPTIONS') {
|
| 11 |
return new Response('ok', { headers: corsHeaders });
|
| 12 |
}
|
|
|
|
| 35 |
// ACTION: SEND SMS (VIA TWILIO)
|
| 36 |
// ==========================================
|
| 37 |
if (action === 'send') {
|
| 38 |
+
/** // Logic commented out to disable phone verification
|
| 39 |
const { data: profile } = await supabaseAdmin
|
| 40 |
.from('profiles')
|
| 41 |
.select('phone')
|
|
|
|
| 89 |
console.error("Twilio Error:", errorText);
|
| 90 |
throw new Error("Failed to send SMS. Check server logs.");
|
| 91 |
}
|
| 92 |
+
**/
|
| 93 |
|
| 94 |
return new Response(
|
| 95 |
+
JSON.stringify({ message: "OTP sent successfully (Verification disabled)" }),
|
| 96 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
|
| 97 |
);
|
| 98 |
}
|
|
|
|
| 101 |
// ACTION: VERIFY
|
| 102 |
// ==========================================
|
| 103 |
if (action === 'verify') {
|
| 104 |
+
/** // Logic commented out to disable phone verification
|
| 105 |
if (!userCode) throw new Error("Missing OTP code");
|
| 106 |
|
| 107 |
const { data: profile } = await supabaseAdmin.from('profiles').select('phone').eq('id', user.id).single();
|
|
|
|
| 121 |
// Success
|
| 122 |
await supabaseAdmin.from('profiles').update({ is_phone_verified: true }).eq('id', user.id);
|
| 123 |
await supabaseAdmin.from('otp_verifications').delete().eq('phone', phone);
|
| 124 |
+
**/
|
| 125 |
|
| 126 |
return new Response(
|
| 127 |
+
JSON.stringify({ message: "Phone verified successfully! (Verification disabled)" }),
|
| 128 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
|
| 129 |
);
|
| 130 |
}
|
| 131 |
|
| 132 |
return new Response(JSON.stringify({ error: "Invalid Action" }), { status: 400, headers: corsHeaders });
|
| 133 |
|
| 134 |
+
} catch (error: any) {
|
| 135 |
return new Response(
|
| 136 |
JSON.stringify({ error: error.message }),
|
| 137 |
{ headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 }
|
backend/api.py
CHANGED
|
@@ -262,18 +262,26 @@ async def perform_candidate_analysis(candidate_id: str, job_id: str, force_refre
|
|
| 262 |
|
| 263 |
# 6. Persist to Database
|
| 264 |
try:
|
|
|
|
| 265 |
data_to_save = {
|
| 266 |
"ai_summary": ai_insights.get("summary"),
|
| 267 |
"ai_insights": {
|
| 268 |
"weaknesses": ai_insights.get("weaknesses") or [],
|
| 269 |
"missing_skills": missing,
|
| 270 |
-
"score_breakdown":
|
| 271 |
},
|
| 272 |
-
"AI_score": ai_insights.get("score") or 0,
|
| 273 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
}
|
| 275 |
client.table("applications").update(data_to_save).eq("user_id", candidate_id).eq("job_id", job_id).execute()
|
| 276 |
-
print(f"💾 Persisted AI analysis for candidate {candidate_id}")
|
| 277 |
except Exception as db_err:
|
| 278 |
print(f"⚠️ Failed to persist AI analysis: {db_err}")
|
| 279 |
|
|
|
|
| 262 |
|
| 263 |
# 6. Persist to Database
|
| 264 |
try:
|
| 265 |
+
breakdown = semantic_result.get("breakdown") or {}
|
| 266 |
data_to_save = {
|
| 267 |
"ai_summary": ai_insights.get("summary"),
|
| 268 |
"ai_insights": {
|
| 269 |
"weaknesses": ai_insights.get("weaknesses") or [],
|
| 270 |
"missing_skills": missing,
|
| 271 |
+
"score_breakdown": breakdown
|
| 272 |
},
|
| 273 |
+
"AI_score": int(ai_insights.get("score") or 0),
|
| 274 |
+
"match_score": int(semantic_result.get("total_score") or 0),
|
| 275 |
+
# Granular Scores mapping to table columns
|
| 276 |
+
"skills_match": int(breakdown.get("skills", 0)),
|
| 277 |
+
"technical_skills_match": int(breakdown.get("technical_skills", 0)),
|
| 278 |
+
"work_experience_match": int(breakdown.get("experience", 0)),
|
| 279 |
+
"education_match": int(breakdown.get("education", 0)),
|
| 280 |
+
"certifications_match": int(breakdown.get("certifications", 0)),
|
| 281 |
+
"project_match": int(breakdown.get("projects", 0))
|
| 282 |
}
|
| 283 |
client.table("applications").update(data_to_save).eq("user_id", candidate_id).eq("job_id", job_id).execute()
|
| 284 |
+
print(f"💾 Persisted AI analysis and granular scores for candidate {candidate_id}")
|
| 285 |
except Exception as db_err:
|
| 286 |
print(f"⚠️ Failed to persist AI analysis: {db_err}")
|
| 287 |
|
backend/check_clusters_after_run.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from supabase import create_client
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 9 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 10 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 11 |
+
|
| 12 |
+
async def check_clusters():
|
| 13 |
+
res = client.table("profiles").select("id, cluster_label").limit(10).execute()
|
| 14 |
+
if not res.data:
|
| 15 |
+
print("No profiles found")
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
print("Sample Cluster Labels:")
|
| 19 |
+
for row in res.data:
|
| 20 |
+
print(f" - ID: {row['id']} | Label: {row['cluster_label']}")
|
| 21 |
+
|
| 22 |
+
if __name__ == "__main__":
|
| 23 |
+
asyncio.run(check_clusters())
|
backend/check_db_clustering.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from supabase import create_client, Client
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
url = os.environ.get("SUPABASE_URL")
|
| 8 |
+
key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 9 |
+
client: Client = create_client(url, key)
|
| 10 |
+
|
| 11 |
+
def check_clustering_status():
|
| 12 |
+
print("Checking profiles table for cluster labels...")
|
| 13 |
+
resp = client.table("profiles").select("id, cluster_label").limit(20).execute()
|
| 14 |
+
data = resp.data
|
| 15 |
+
|
| 16 |
+
if not data:
|
| 17 |
+
print("No profiles found.")
|
| 18 |
+
return
|
| 19 |
+
|
| 20 |
+
# Count how many have labels
|
| 21 |
+
labeled = [d for d in data if d.get("cluster_label")]
|
| 22 |
+
print(f"Sample size: {len(data)}")
|
| 23 |
+
print(f"Profiles with cluster_label: {len(labeled)}")
|
| 24 |
+
|
| 25 |
+
if labeled:
|
| 26 |
+
print("Sample labels:")
|
| 27 |
+
for d in labeled[:5]:
|
| 28 |
+
print(f" - {d['id']}: {d['cluster_label']}")
|
| 29 |
+
else:
|
| 30 |
+
print("No profiles have cluster labels in this sample.")
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
check_clustering_status()
|
backend/check_job_data.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from supabase import create_client
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 9 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 10 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 11 |
+
|
| 12 |
+
async def check_job():
|
| 13 |
+
job_id = "45bcca29-4e12-45bf-97d4-0b77ff55472f"
|
| 14 |
+
res = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()
|
| 15 |
+
if not res.data:
|
| 16 |
+
print("Job not found in job_embeddings")
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
data = res.data[0]
|
| 20 |
+
print(f"Data for Job {job_id}:")
|
| 21 |
+
for k, v in data.items():
|
| 22 |
+
if k in ['job_id', 'created_at', 'updated_at']: continue
|
| 23 |
+
print(f" - {k}: {'POPULATED' if v else 'NULL'}")
|
| 24 |
+
|
| 25 |
+
if __name__ == "__main__":
|
| 26 |
+
asyncio.run(check_job())
|
backend/debug_profile.json
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"id": "a29ba56a-0d5b-4bc9-9a15-e314f6447260",
|
| 3 |
+
"updated_at": "2026-01-30T06:38:28.185688+00:00",
|
| 4 |
+
"full_name": null,
|
| 5 |
+
"role": "applicant",
|
| 6 |
+
"company_id": null,
|
| 7 |
+
"avatar_url": null,
|
| 8 |
+
"resume_url": "a29ba56a-0d5b-4bc9-9a15-e314f6447260/1769755098632_resume_ey.pdf",
|
| 9 |
+
"location": null,
|
| 10 |
+
"headline": "Final-year Computer Science student hands-on experience Machine Learning. Skilled React, Python, Flask, Supabase (PostgreSQL). Built ATS-style resume screening tools stock price prediction apps.",
|
| 11 |
+
"summary": null,
|
| 12 |
+
"skills": [
|
| 13 |
+
"Artificial Intelligence",
|
| 14 |
+
"Machine Learning",
|
| 15 |
+
"Communication",
|
| 16 |
+
"Team Work",
|
| 17 |
+
"Problem Solving",
|
| 18 |
+
"Conflict resolution"
|
| 19 |
+
],
|
| 20 |
+
"work_experience": [
|
| 21 |
+
{
|
| 22 |
+
"role": "AI/ML Intern",
|
| 23 |
+
"years": "June 2025",
|
| 24 |
+
"company": "ICT Academy Kerala",
|
| 25 |
+
"duration": "1 month",
|
| 26 |
+
"description": "Completed 1-month internship focused Artificial Intelligence Machine Learning. CreatedandassessedMLmodelsonreal-worlddatasets;improvedvalidationaccuracyafterfeatureengineering hyperparameter tuning."
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"role": "Django Intern",
|
| 30 |
+
"years": "Sept 2023",
|
| 31 |
+
"company": "Neo Green Labs",
|
| 32 |
+
"duration": null,
|
| 33 |
+
"description": "Delivered Django API endpoints (CRUD) connected relational database. Supported REST API implementation production use cases."
|
| 34 |
+
}
|
| 35 |
+
],
|
| 36 |
+
"education": [
|
| 37 |
+
{
|
| 38 |
+
"year": "Nov 2022 Ongoing",
|
| 39 |
+
"course": "B.Tech Computer Science",
|
| 40 |
+
"institution": "APJ Abdul Technological University"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"year": "Jun 2020 Mar 2022",
|
| 44 |
+
"course": "Higher Secondary Education",
|
| 45 |
+
"institution": "Carmel College Engineering Technology"
|
| 46 |
+
}
|
| 47 |
+
],
|
| 48 |
+
"phone": "+91 8921173593",
|
| 49 |
+
"current_position": null,
|
| 50 |
+
"address": null,
|
| 51 |
+
"linkedin": null,
|
| 52 |
+
"github": null,
|
| 53 |
+
"portfolio": null,
|
| 54 |
+
"experience_years": null,
|
| 55 |
+
"certifications": "ICT Academy Kerala (2025), The Joy Of Computing Using Python (Elite Rank), NPTEL (2025)",
|
| 56 |
+
"technical_skills": "Python, Java, C, SQL, React, Flask, Supabase, PostgreSQL, Django, XGBoost, LSTM",
|
| 57 |
+
"languages": null,
|
| 58 |
+
"professional_references": null,
|
| 59 |
+
"desired_salary": null,
|
| 60 |
+
"industry_experience": null,
|
| 61 |
+
"career_goals": null,
|
| 62 |
+
"willing_to_relocate": false,
|
| 63 |
+
"available_remote": false,
|
| 64 |
+
"processed": true,
|
| 65 |
+
"file_hash": "58406de4a011cd48192fe9e8a8e93e0255263632344bec40629deb639b54e847",
|
| 66 |
+
"projects": [
|
| 67 |
+
{
|
| 68 |
+
"title": "CV Ordering And Numbering Application",
|
| 69 |
+
"description": "Implemented automated CV filtering, ranking, clustering using job-specific criteria skill similarity algorithms. Built role-based access control system administrators, recruiters, applicants ensure secure streamlined workflows. Added PDF Excel report generation, Systematized email notifications, ATS-compatible resume for- matting. Architected platform emphasis scalability, data privacy, user-centric design improve recruiter efficiency candidate experience.",
|
| 70 |
+
"technologies_used": [
|
| 71 |
+
"React",
|
| 72 |
+
"Vite",
|
| 73 |
+
"Supabase"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"title": "Stock Price Prediction System",
|
| 78 |
+
"description": "Built React + Flask web app forecast stock prices using historical OHLCV data. TrainedandbenchmarkedXGBoostandLSTMmodels;servedpredictionsthroughRESTAPIsanddisplayed trends. Created responsive UI entering stock symbols comparing predicted vs. actual price trends; trained models Google Colab delivered Matplotlib plots Flask backend.",
|
| 79 |
+
"technologies_used": [
|
| 80 |
+
"React",
|
| 81 |
+
"Flask",
|
| 82 |
+
"Python"
|
| 83 |
+
]
|
| 84 |
+
}
|
| 85 |
+
],
|
| 86 |
+
"email": null,
|
| 87 |
+
"is_phone_verified": false,
|
| 88 |
+
"ai_score": 0,
|
| 89 |
+
"cluster_label": null
|
| 90 |
+
}
|
backend/debug_score.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from supabase import create_client
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from src.matching.similarity import calculate_granular_match_score
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 11 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 12 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 13 |
+
|
| 14 |
+
async def run_test():
|
| 15 |
+
res = client.table("applications").select("user_id, job_id").limit(1).execute()
|
| 16 |
+
if not res.data:
|
| 17 |
+
print("No apps found")
|
| 18 |
+
return
|
| 19 |
+
|
| 20 |
+
c_id = res.data[0]["user_id"]
|
| 21 |
+
j_id = res.data[0]["job_id"]
|
| 22 |
+
|
| 23 |
+
# Raw fetch
|
| 24 |
+
p_emb = client.table("profile_embeddings").select("*").eq("id", c_id).execute().data[0]
|
| 25 |
+
j_emb = client.table("job_embeddings").select("*").eq("job_id", j_id).execute().data[0]
|
| 26 |
+
|
| 27 |
+
log = []
|
| 28 |
+
log.append(f"Testing {c_id} against {j_id}")
|
| 29 |
+
|
| 30 |
+
def get_len(v):
|
| 31 |
+
if v is None: return "None"
|
| 32 |
+
if isinstance(v, str):
|
| 33 |
+
try:
|
| 34 |
+
# Approximate len by comma count
|
| 35 |
+
return v.count(',') + 1
|
| 36 |
+
except: return "StringError"
|
| 37 |
+
return len(v)
|
| 38 |
+
|
| 39 |
+
log.append("\n--- Profile Lengths ---")
|
| 40 |
+
for k in ['skills', 'technical_skills', 'experience', 'certifications']:
|
| 41 |
+
log.append(f"{k}: {get_len(p_emb.get(k))}")
|
| 42 |
+
|
| 43 |
+
log.append("\n--- Job Lengths ---")
|
| 44 |
+
for k in ['skills', 'technical_skills', 'work_experience', 'certifications']:
|
| 45 |
+
log.append(f"{k}: {get_len(j_emb.get(k))}")
|
| 46 |
+
|
| 47 |
+
result = await calculate_granular_match_score(client, c_id, j_id)
|
| 48 |
+
log.append(f"\nResult: {json.dumps(result)}")
|
| 49 |
+
|
| 50 |
+
with open("debug_log.txt", "w") as f:
|
| 51 |
+
f.write("\n".join(log))
|
| 52 |
+
print("Logged to debug_log.txt")
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
asyncio.run(run_test())
|
backend/docs/efficiency_guide.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BGE-M3 Efficiency Guide
|
| 2 |
+
|
| 3 |
+
This guide explains how to measure and optimize the efficiency of the BAAI/bge-m3 model used in the IRIS project.
|
| 4 |
+
|
| 5 |
+
## 1. Key Metrics
|
| 6 |
+
|
| 7 |
+
### Performance (Infrastructure)
|
| 8 |
+
- **Latency**: Time taken to generate an embedding for a single text. Critical for real-time search.
|
| 9 |
+
- **Throughput**: Number of documents processed per second. Important for batch processing (e.g., initial profile indexing).
|
| 10 |
+
- **VRAM/RAM Usage**: Memory footprint of the model. BGE-M3 is ~2.2GB in FP32.
|
| 11 |
+
|
| 12 |
+
### Retrieval Quality (Accuracy)
|
| 13 |
+
- **Precision@K**: The proportion of relevant candidates in the top K results.
|
| 14 |
+
* *Example*: If you return 10 candidates and 3 are actually qualified, Precision@10 = 30%.
|
| 15 |
+
- **Recall@K** (Correlation to User's "callback"): The proportion of total relevant candidates that were successfully captured in the top K.
|
| 16 |
+
* *Example*: If there are 5 qualified candidates in the database and your search finds 4 of them in the top 10, Recall@10 = 80%.
|
| 17 |
+
- **MRR (Mean Reciprocal Rank)**: Evaluates how high the first relevant candidate is ranked.
|
| 18 |
+
* *Formula*: $1 / Rank$. If the best candidate is at position #1, score is 1.0. If at #2, score is 0.5.
|
| 19 |
+
- **NDCG (Normalized Discounted Cumulative Gain)**: Measures the overall quality of the ranking order, giving more weight to highly relevant results at the very top.
|
| 20 |
+
|
| 21 |
+
## 2. BGE-M3 Specific Features
|
| 22 |
+
|
| 23 |
+
BGE-M3 is a "multi-function" model. You can measure efficiency across three modes:
|
| 24 |
+
1. **Dense Retrieval**: Standard 1024d vectors. Fast and semantic.
|
| 25 |
+
2. **Sparse Retrieval (Lexical)**: Similar to BM25 but learned. More efficient for exact keyword matching.
|
| 26 |
+
3. **Multi-Vector (ColBERT style)**: Most accurate but highest storage and latency cost.
|
| 27 |
+
|
| 28 |
+
## 3. Optimization Techniques
|
| 29 |
+
|
| 30 |
+
### Precision Tuning
|
| 31 |
+
- **FP16**: Use `model.half()` if on GPU to double speed and halve memory with negligible accuracy loss.
|
| 32 |
+
- **Quantization**: Int8 or GGUF formats can reduce memory usage by 4x.
|
| 33 |
+
|
| 34 |
+
### Batching
|
| 35 |
+
Using optimal batch sizes (e.g., 16-32) significantly improves throughput compared to single-sentence processing.
|
| 36 |
+
|
| 37 |
+
## 4. Measuring Quality in IRIS
|
| 38 |
+
To measure quality, create a "Golden Dataset" of (Job Description, Relevant Profiles) and calculate Hit Rate:
|
| 39 |
+
1. Fetch top 10 profiles for a job.
|
| 40 |
+
2. Check if the "ideal" candidate is in that list.
|
| 41 |
+
3. Average this over 50 test cases.
|
backend/final_verify.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from supabase import create_client
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from api import perform_candidate_analysis
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 11 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 12 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 13 |
+
|
| 14 |
+
async def verify():
|
| 15 |
+
res = client.table("applications").select("user_id, job_id").limit(1).execute()
|
| 16 |
+
if not res.data:
|
| 17 |
+
print("No apps found")
|
| 18 |
+
return
|
| 19 |
+
|
| 20 |
+
c_id = res.data[0]["user_id"]
|
| 21 |
+
j_id = res.data[0]["job_id"]
|
| 22 |
+
|
| 23 |
+
print(f"Triggering fresh analysis for {c_id} / {j_id}")
|
| 24 |
+
await perform_candidate_analysis(c_id, j_id, force_refresh=True)
|
| 25 |
+
|
| 26 |
+
print("\nChecking resulting record in DB:")
|
| 27 |
+
final_res = client.table("applications") \
|
| 28 |
+
.select("match_score, skills_match, technical_skills_match, work_experience_match, education_match, certifications_match, project_match") \
|
| 29 |
+
.eq("user_id", c_id).eq("job_id", j_id) \
|
| 30 |
+
.execute()
|
| 31 |
+
|
| 32 |
+
if final_res.data:
|
| 33 |
+
print(json.dumps(final_res.data[0], indent=2))
|
| 34 |
+
else:
|
| 35 |
+
print("Record not found after update")
|
| 36 |
+
|
| 37 |
+
if __name__ == "__main__":
|
| 38 |
+
asyncio.run(verify())
|
backend/fix_profile_embeddings_trigger.sql
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- fix_profile_embeddings_trigger.sql
|
| 2 |
+
-- Run this in your Supabase SQL Editor to fully resolve the "j_emb" error!
|
| 3 |
+
|
| 4 |
+
-- 1. Redefine the function used by the trigger that refreshes recommendations
|
| 5 |
+
-- The error "record j_emb has no field experience" was likely deeply cached in this logic
|
| 6 |
+
CREATE OR REPLACE FUNCTION public.trg_refresh_recommendations_for_user()
|
| 7 |
+
RETURNS trigger
|
| 8 |
+
LANGUAGE plpgsql
|
| 9 |
+
AS $function$
|
| 10 |
+
DECLARE
|
| 11 |
+
j_id uuid;
|
| 12 |
+
match_res json;
|
| 13 |
+
BEGIN
|
| 14 |
+
-- First clear out old recommendations for this user
|
| 15 |
+
DELETE FROM public.job_recommendations WHERE user_id = NEW.id;
|
| 16 |
+
|
| 17 |
+
-- Iterate through all existing job embeddings
|
| 18 |
+
FOR j_id IN SELECT job_id FROM public.job_embeddings LOOP
|
| 19 |
+
|
| 20 |
+
-- Call the fixed match_profile_job function
|
| 21 |
+
match_res := public.match_profile_job(NEW.id, j_id);
|
| 22 |
+
|
| 23 |
+
-- Only insert if there's an actual match > 0
|
| 24 |
+
IF (match_res->>'match_score')::int > 0 THEN
|
| 25 |
+
INSERT INTO public.job_recommendations (
|
| 26 |
+
user_id, job_id, match_score, skills_match, technical_skills_match,
|
| 27 |
+
work_experience_match, education_match, certifications_match, project_match
|
| 28 |
+
) VALUES (
|
| 29 |
+
NEW.id, j_id,
|
| 30 |
+
(match_res->>'match_score')::int,
|
| 31 |
+
(match_res->>'skills_match')::int,
|
| 32 |
+
(match_res->>'technical_skills_match')::int,
|
| 33 |
+
(match_res->>'work_experience_match')::int,
|
| 34 |
+
(match_res->>'education_match')::int,
|
| 35 |
+
(match_res->>'certifications_match')::int,
|
| 36 |
+
(match_res->>'project_match')::int
|
| 37 |
+
);
|
| 38 |
+
END IF;
|
| 39 |
+
|
| 40 |
+
END LOOP;
|
| 41 |
+
|
| 42 |
+
RETURN NEW;
|
| 43 |
+
END;
|
| 44 |
+
$function$;
|
| 45 |
+
|
| 46 |
+
-- 2. Drop the redundant webhook trigger since you only need the recommendation refresh
|
| 47 |
+
-- Having both might cause race conditions or unnecessary webhooks
|
| 48 |
+
DROP TRIGGER IF EXISTS on_profile_embedding_upsert ON public.profile_embeddings;
|
| 49 |
+
|
| 50 |
+
-- 3. Ensure the embedding refresh trigger is properly attached
|
| 51 |
+
DROP TRIGGER IF EXISTS on_profile_embedding_change ON public.profile_embeddings;
|
| 52 |
+
|
| 53 |
+
CREATE TRIGGER on_profile_embedding_change
|
| 54 |
+
AFTER INSERT OR UPDATE ON public.profile_embeddings
|
| 55 |
+
FOR EACH ROW
|
| 56 |
+
EXECUTE FUNCTION trg_refresh_recommendations_for_user();
|
backend/generate_realistic_resumes.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import random
|
| 3 |
+
import uuid
|
| 4 |
+
import datetime
|
| 5 |
+
try:
|
| 6 |
+
from faker import Faker
|
| 7 |
+
except ImportError:
|
| 8 |
+
print("Faker not found. Please install it with: pip install Faker")
|
| 9 |
+
exit(1)
|
| 10 |
+
|
| 11 |
+
fake = Faker()
|
| 12 |
+
|
| 13 |
+
# ---------------------------------------------------------
|
| 14 |
+
# CONSTANTS & DICTIONARIES
|
| 15 |
+
# ---------------------------------------------------------
|
| 16 |
+
|
| 17 |
+
SOFT_SKILLS = [
|
| 18 |
+
"Communication", "Teamwork", "Adaptability", "Analytical Thinking", "Problem Solving",
|
| 19 |
+
"Leadership", "Time Management", "Critical Thinking", "Empathy", "Conflict Resolution",
|
| 20 |
+
"Creativity", "Attention to Detail", "Work Ethic", "Interpersonal Skills", "Emotional Intelligence"
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
TECH_SKILLS = [
|
| 24 |
+
"Python", "Java", "C++", "C#", "JavaScript", "TypeScript", "React", "Angular", "Vue.js",
|
| 25 |
+
"Node.js", "Express", "Django", "Flask", "Spring Boot", "SQL", "PostgreSQL", "MySQL",
|
| 26 |
+
"MongoDB", "AWS", "Azure", "GCP", "Docker", "Kubernetes", "Git", "TensorFlow", "PyTorch",
|
| 27 |
+
"Pandas", "NumPy", "Scikit-learn", "HTML", "CSS", "Bash", "Linux"
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
ROLES = [
|
| 31 |
+
"Software Engineer", "Frontend Developer", "Backend Developer", "Full Stack Developer",
|
| 32 |
+
"Data Scientist", "Machine Learning Engineer", "DevOps Engineer", "Cloud Architect",
|
| 33 |
+
"System Administrator", "Database Administrator", "QA Engineer", "Product Manager"
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
DEGREES = [
|
| 37 |
+
"B.Tech in Computer Science and Engineering",
|
| 38 |
+
"B.S. in Computer Science",
|
| 39 |
+
"M.S. in Software Engineering",
|
| 40 |
+
"B.A. in Information Technology",
|
| 41 |
+
"M.Tech in Data Science",
|
| 42 |
+
"B.S. in Electrical Engineering",
|
| 43 |
+
"Bootcamp Graduate in Web Development"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
CERTIFICATIONS = [
|
| 47 |
+
"AWS Certified Solutions Architect", "Google Cloud Professional Data Engineer",
|
| 48 |
+
"Certified Kubernetes Administrator (CKA)", "Cisco Certified Network Associate (CCNA)",
|
| 49 |
+
"Microsoft Certified: Azure Administrator Associate", "CompTIA Security+",
|
| 50 |
+
"Deep Learning Specialization (Coursera)", "Oracle Certified Professional Java SE Programmer"
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
# ---------------------------------------------------------
|
| 54 |
+
# GENERATION LOGIC
|
| 55 |
+
# ---------------------------------------------------------
|
| 56 |
+
|
| 57 |
+
def generate_education():
|
| 58 |
+
edu_list = []
|
| 59 |
+
# Always a bachelor/masters
|
| 60 |
+
year_start = random.randint(2015, 2022)
|
| 61 |
+
course = random.choice(DEGREES)
|
| 62 |
+
institution = fake.company() + " University"
|
| 63 |
+
year = f"{year_start} - {year_start + 4}"
|
| 64 |
+
|
| 65 |
+
edu_list.append({
|
| 66 |
+
"course": course,
|
| 67 |
+
"institution": institution,
|
| 68 |
+
"year": year
|
| 69 |
+
})
|
| 70 |
+
|
| 71 |
+
# Sometimes high school
|
| 72 |
+
if random.random() > 0.5:
|
| 73 |
+
edu_list.append({
|
| 74 |
+
"course": "Higher Secondary Education",
|
| 75 |
+
"institution": f"{fake.city()} High School",
|
| 76 |
+
"year": f"{year_start - 2} - {year_start}"
|
| 77 |
+
})
|
| 78 |
+
|
| 79 |
+
return edu_list
|
| 80 |
+
|
| 81 |
+
def generate_work_experience(role):
|
| 82 |
+
exp_list = []
|
| 83 |
+
num_jobs = random.randint(1, 3)
|
| 84 |
+
current_year = 2026
|
| 85 |
+
|
| 86 |
+
for _ in range(num_jobs):
|
| 87 |
+
start_year = current_year - random.randint(1, 3)
|
| 88 |
+
duration = f"{fake.month_name()[:3]} {start_year} - " + (f"{fake.month_name()[:3]} {current_year}" if current_year < 2026 else "Present")
|
| 89 |
+
|
| 90 |
+
# Descriptions with actual tech context
|
| 91 |
+
action = random.choice(["Developed", "Maintained", "Architected", "Optimized", "Spearheaded", "Collaborated on"])
|
| 92 |
+
project = random.choice(["a scalable microservices architecture", "a responsive web application", "a high-throughput data pipeline", "an internal dashboard", "a machine learning model"])
|
| 93 |
+
impact = random.choice(["reducing latency by 30%.", "increasing user engagement by 15%.", "saving $10k annually.", "improving deployment speed."])
|
| 94 |
+
|
| 95 |
+
description = f"{action} {project} {impact}. Worked within an Agile framework to deliver features on schedule."
|
| 96 |
+
|
| 97 |
+
exp_list.append({
|
| 98 |
+
"role": role if random.random() > 0.3 else random.choice(ROLES),
|
| 99 |
+
"company": fake.company(),
|
| 100 |
+
"years": duration,
|
| 101 |
+
"description": description
|
| 102 |
+
})
|
| 103 |
+
current_year = start_year - 1
|
| 104 |
+
|
| 105 |
+
return exp_list
|
| 106 |
+
|
| 107 |
+
def generate_projects(tech_pool):
|
| 108 |
+
proj_list = []
|
| 109 |
+
num_proj = random.randint(1, 3)
|
| 110 |
+
|
| 111 |
+
for _ in range(num_proj):
|
| 112 |
+
p_tech = random.sample(tech_pool, k=min(len(tech_pool), random.randint(2, 4)))
|
| 113 |
+
desc = f"Built a {fake.bs()} platform using {', '.join(p_tech)}. Implemented {fake.catch_phrase().lower()} to solve real-world industry challenges."
|
| 114 |
+
|
| 115 |
+
proj_list.append({
|
| 116 |
+
"tech_stack": p_tech,
|
| 117 |
+
"description": desc
|
| 118 |
+
})
|
| 119 |
+
|
| 120 |
+
return proj_list
|
| 121 |
+
|
| 122 |
+
def build_candidate():
|
| 123 |
+
user_id = str(uuid.uuid4())
|
| 124 |
+
role = random.choice(ROLES)
|
| 125 |
+
|
| 126 |
+
# 1. SOFT SKILLS (LIST)
|
| 127 |
+
cand_soft_skills = random.sample(SOFT_SKILLS, k=random.randint(3, 6))
|
| 128 |
+
|
| 129 |
+
# 2. TECH SKILLS (COMMA STRING LIKE IN DEBUG_PAYLOAD)
|
| 130 |
+
cand_tech_list = random.sample(TECH_SKILLS, k=random.randint(6, 12))
|
| 131 |
+
cand_tech_string = ", ".join(cand_tech_list)
|
| 132 |
+
|
| 133 |
+
# 3. CERTIFICATIONS (COMMA STRING)
|
| 134 |
+
cand_certs = ", ".join(random.sample(CERTIFICATIONS, k=random.randint(0, 2)))
|
| 135 |
+
|
| 136 |
+
# 4. EDUCATION
|
| 137 |
+
edu = generate_education()
|
| 138 |
+
|
| 139 |
+
# 5. EXPERIENCE
|
| 140 |
+
exp = generate_work_experience(role)
|
| 141 |
+
|
| 142 |
+
# 6. PROJECTS
|
| 143 |
+
proj = generate_projects(cand_tech_list)
|
| 144 |
+
|
| 145 |
+
# 7. SUMMARY
|
| 146 |
+
summary = f"{role} with {random.randint(1, 10)} years of experience. Proficient in {cand_tech_list[0]}, {cand_tech_list[1]}, and {cand_tech_list[2]}. Known for {cand_soft_skills[0].lower()} and {cand_soft_skills[1].lower()}. Dedicated to {fake.catch_phrase().lower()}."
|
| 147 |
+
|
| 148 |
+
payload = {
|
| 149 |
+
"id": user_id,
|
| 150 |
+
"resume_url": f"{user_id}/resume.pdf",
|
| 151 |
+
"file_hash": fake.sha256(),
|
| 152 |
+
"processed": True,
|
| 153 |
+
"updated_at": "now()",
|
| 154 |
+
"full_name": fake.name(),
|
| 155 |
+
"summary": summary,
|
| 156 |
+
"phone": fake.phone_number(),
|
| 157 |
+
"email": fake.email(),
|
| 158 |
+
"skills": cand_soft_skills, # Note: Soft skills as List
|
| 159 |
+
"technical_skills": cand_tech_string, # Note: Tech skills as string representation (matching actual IRIS DB ingest logic)
|
| 160 |
+
"education": edu,
|
| 161 |
+
"work_experience": exp,
|
| 162 |
+
"projects": proj,
|
| 163 |
+
"certifications": cand_certs if cand_certs else None
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
return payload
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def generate_dataset(num_records=250):
|
| 170 |
+
print(f"🚀 Generating highly realistic dataset with {num_records} candidates...")
|
| 171 |
+
candidates = []
|
| 172 |
+
for _ in range(num_records):
|
| 173 |
+
candidates.append(build_candidate())
|
| 174 |
+
|
| 175 |
+
file_name = "realistic_synthetic_resumes.json"
|
| 176 |
+
with open(file_name, "w", encoding="utf-8") as f:
|
| 177 |
+
json.dump(candidates, f, indent=4)
|
| 178 |
+
|
| 179 |
+
print(f"✅ Successfully wrote {num_records} real-format JSON objects to '{file_name}'!")
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
if __name__ == "__main__":
|
| 183 |
+
generate_dataset(250)
|
backend/inspect_columns.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from supabase import create_client
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 9 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 10 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 11 |
+
|
| 12 |
+
async def inspect():
|
| 13 |
+
print("--- Profile Embeddings Columns ---")
|
| 14 |
+
p_res = client.table("profile_embeddings").select("*").limit(1).execute()
|
| 15 |
+
if p_res.data:
|
| 16 |
+
for k in sorted(p_res.data[0].keys()):
|
| 17 |
+
print(f" - {k}")
|
| 18 |
+
else:
|
| 19 |
+
print("No profile embeddings found")
|
| 20 |
+
|
| 21 |
+
print("\n--- Job Embeddings Columns ---")
|
| 22 |
+
j_res = client.table("job_embeddings").select("*").limit(1).execute()
|
| 23 |
+
if j_res.data:
|
| 24 |
+
for k in sorted(j_res.data[0].keys()):
|
| 25 |
+
print(f" - {k}")
|
| 26 |
+
else:
|
| 27 |
+
print("No job embeddings found")
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
asyncio.run(inspect())
|
backend/inspect_schema.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from supabase import create_client
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 9 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 10 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 11 |
+
|
| 12 |
+
async def inspect():
|
| 13 |
+
print("--- Profile Embeddings Sample ---")
|
| 14 |
+
p_res = client.table("profile_embeddings").select("*").limit(1).execute()
|
| 15 |
+
if p_res.data:
|
| 16 |
+
print(", ".join(p_res.data[0].keys()))
|
| 17 |
+
else:
|
| 18 |
+
print("No profile embeddings found")
|
| 19 |
+
|
| 20 |
+
print("\n--- Job Embeddings Sample ---")
|
| 21 |
+
j_res = client.table("job_embeddings").select("*").limit(1).execute()
|
| 22 |
+
if j_res.data:
|
| 23 |
+
print(", ".join(j_res.data[0].keys()))
|
| 24 |
+
else:
|
| 25 |
+
print("No job embeddings found")
|
| 26 |
+
|
| 27 |
+
if __name__ == "__main__":
|
| 28 |
+
asyncio.run(inspect())
|
backend/inspect_schema_fixed.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from supabase import create_client
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
| 10 |
+
SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 11 |
+
client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
| 12 |
+
|
| 13 |
+
async def inspect():
|
| 14 |
+
with open("schema_dump.txt", "w") as f:
|
| 15 |
+
f.write("--- Profile Embeddings ---\n")
|
| 16 |
+
p_res = client.table("profile_embeddings").select("*").limit(1).execute()
|
| 17 |
+
if p_res.data:
|
| 18 |
+
cols = sorted(p_res.data[0].keys())
|
| 19 |
+
for c in cols:
|
| 20 |
+
f.write(f"- {c}\n")
|
| 21 |
+
else:
|
| 22 |
+
f.write("No data\n")
|
| 23 |
+
|
| 24 |
+
f.write("\n--- Job Embeddings ---\n")
|
| 25 |
+
j_res = client.table("job_embeddings").select("*").limit(1).execute()
|
| 26 |
+
if j_res.data:
|
| 27 |
+
cols = sorted(j_res.data[0].keys())
|
| 28 |
+
for c in cols:
|
| 29 |
+
f.write(f"- {c}\n")
|
| 30 |
+
else:
|
| 31 |
+
f.write("No data\n")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
asyncio.run(inspect())
|
backend/out_cmd.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Traceback (most recent call last):
|
| 2 |
+
File "C:\Users\sandr\IRIS2026\IRIS_FULL\backend\recalculate_scores.py", line 90, in <module>
|
| 3 |
+
asyncio.run(main())
|
| 4 |
+
~~~~~~~~~~~^^^^^^^^
|
| 5 |
+
File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\runners.py", line 195, in run
|
| 6 |
+
return runner.run(main)
|
| 7 |
+
~~~~~~~~~~^^^^^^
|
| 8 |
+
File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\runners.py", line 118, in run
|
| 9 |
+
return self._loop.run_until_complete(task)
|
| 10 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
|
| 11 |
+
File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\base_events.py", line 725, in run_until_complete
|
| 12 |
+
return future.result()
|
| 13 |
+
~~~~~~~~~~~~~^^
|
| 14 |
+
File "C:\Users\sandr\IRIS2026\IRIS_FULL\backend\recalculate_scores.py", line 23, in main
|
| 15 |
+
print("\U0001f50d Fetching all applications from Supabase...")
|
| 16 |
+
~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 17 |
+
File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\encodings\cp1252.py", line 19, in encode
|
| 18 |
+
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
| 19 |
+
~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 20 |
+
UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f50d' in position 0: character maps to <undefined>
|
backend/realistic_synthetic_resumes.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
backend/remove_triggers_for_profile_embeddings.sql
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- remove_triggers_for_profile_embeddings.sql
|
| 2 |
+
-- Run this in your Supabase SQL Editor to completely disable the triggers
|
| 3 |
+
-- causing the "embedding generation failed" error.
|
| 4 |
+
|
| 5 |
+
-- 1. Drop the trigger that refreshes recommendations
|
| 6 |
+
DROP TRIGGER IF EXISTS on_profile_embedding_change ON public.profile_embeddings;
|
| 7 |
+
|
| 8 |
+
-- 2. Drop the redundant webhook trigger
|
| 9 |
+
DROP TRIGGER IF EXISTS on_profile_embedding_upsert ON public.profile_embeddings;
|
| 10 |
+
|
| 11 |
+
-- 3. Drop the function that refreshes recommendations
|
| 12 |
+
DROP FUNCTION IF EXISTS public.trg_refresh_recommendations_for_user CASCADE;
|
| 13 |
+
|
| 14 |
+
-- 4. Drop the function for the webhook trigger
|
| 15 |
+
DROP FUNCTION IF EXISTS public.trg_on_profile_embedding_update CASCADE;
|
| 16 |
+
|
| 17 |
+
-- Now the Python upsert:
|
| 18 |
+
-- client.table("profile_embeddings").upsert(payload).execute()
|
| 19 |
+
-- will run purely as a database insert without any hidden functions interrupting it.
|
backend/repair_system_mismatches.sql
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-- repair_system_mismatches.sql
|
| 2 |
+
-- Run this in Supabase SQL Editor to resolve the "j_emb" error and restore automatic matching.
|
| 3 |
+
|
| 4 |
+
-- 1. FIX THE MATCHING FUNCTION (The "j_emb" bug fix)
|
| 5 |
+
-- This function is used by triggers and the RPC.
|
| 6 |
+
-- We ENSURE it uses 'work_experience' for jobs and 'experience' for profiles.
|
| 7 |
+
CREATE OR REPLACE FUNCTION public.match_profile_job(p_id uuid, j_id uuid)
|
| 8 |
+
RETURNS json
|
| 9 |
+
LANGUAGE plpgsql
|
| 10 |
+
AS $function$
|
| 11 |
+
DECLARE
|
| 12 |
+
p_rec record;
|
| 13 |
+
j_rec record; -- Consistency check: Job record MUST use its real columns
|
| 14 |
+
s_sim float := 0; t_sim float := 0; exp_sim float := 0;
|
| 15 |
+
edu_sim float := 0; cert_sim float := 0; proj_sim float := 0;
|
| 16 |
+
s_score int := 0; t_score int := 0; e_score int := 0;
|
| 17 |
+
ed_score int := 0; c_score int := 0; p_score int := 0;
|
| 18 |
+
BEGIN
|
| 19 |
+
-- Fetch Profile Embeddings
|
| 20 |
+
SELECT * INTO p_rec FROM public.profile_embeddings WHERE id = p_id;
|
| 21 |
+
IF NOT FOUND THEN RETURN json_build_object('error', 'Profile embeddings not found'); END IF;
|
| 22 |
+
|
| 23 |
+
-- Fetch Job Embeddings
|
| 24 |
+
SELECT * INTO j_rec FROM public.job_embeddings WHERE job_id = j_id;
|
| 25 |
+
IF NOT FOUND THEN RETURN json_build_object('error', 'Job embeddings not found'); END IF;
|
| 26 |
+
|
| 27 |
+
-- Similarity with Cosine Distance (<=>)
|
| 28 |
+
IF p_rec.skills IS NOT NULL AND j_rec.skills IS NOT NULL THEN
|
| 29 |
+
s_sim := coalesce(nullif(1 - (p_rec.skills <=> j_rec.skills), 'NaN'), 0);
|
| 30 |
+
END IF;
|
| 31 |
+
|
| 32 |
+
IF p_rec.technical_skills IS NOT NULL AND j_rec.technical_skills IS NOT NULL THEN
|
| 33 |
+
t_sim := coalesce(nullif(1 - (p_rec.technical_skills <=> j_rec.technical_skills), 'NaN'), 0);
|
| 34 |
+
END IF;
|
| 35 |
+
|
| 36 |
+
-- FIX: Profile column is 'experience', Job column is 'work_experience'
|
| 37 |
+
IF p_rec.experience IS NOT NULL AND j_rec.work_experience IS NOT NULL THEN
|
| 38 |
+
exp_sim := coalesce(nullif(1 - (p_rec.experience <=> j_rec.work_experience), 'NaN'), 0);
|
| 39 |
+
END IF;
|
| 40 |
+
|
| 41 |
+
IF p_rec.education IS NOT NULL AND j_rec.education IS NOT NULL THEN
|
| 42 |
+
edu_sim := coalesce(nullif(1 - (p_rec.education <=> j_rec.education), 'NaN'), 0);
|
| 43 |
+
END IF;
|
| 44 |
+
|
| 45 |
+
IF p_rec.certifications IS NOT NULL THEN
|
| 46 |
+
cert_sim := coalesce(nullif(1 - (p_rec.certifications <=> coalesce(j_rec.technical_skills, j_rec.skills)), 'NaN'), 0);
|
| 47 |
+
END IF;
|
| 48 |
+
|
| 49 |
+
IF p_rec.projects IS NOT NULL AND j_rec.technical_skills IS NOT NULL THEN
|
| 50 |
+
proj_sim := coalesce(nullif(1 - (p_rec.projects <=> j_rec.technical_skills), 'NaN'), 0);
|
| 51 |
+
END IF;
|
| 52 |
+
|
| 53 |
+
-- Scaling to 0-100
|
| 54 |
+
s_score := (greatest(0, least(1, s_sim)) * 100)::int;
|
| 55 |
+
t_score := (greatest(0, least(1, t_sim)) * 100)::int;
|
| 56 |
+
e_score := (greatest(0, least(1, exp_sim)) * 100)::int;
|
| 57 |
+
ed_score := (greatest(0, least(1, edu_sim)) * 100)::int;
|
| 58 |
+
c_score := (greatest(0, least(1, cert_sim)) * 100)::int;
|
| 59 |
+
p_score := (greatest(0, least(1, proj_sim)) * 100)::int;
|
| 60 |
+
|
| 61 |
+
RETURN json_build_object(
|
| 62 |
+
'match_score', ((t_score * 0.35) + (e_score * 0.20) + (p_score * 0.15) + (s_score * 0.10) + (ed_score * 0.10) + (c_score * 0.10))::int,
|
| 63 |
+
'skills_match', s_score,
|
| 64 |
+
'technical_skills_match', t_score,
|
| 65 |
+
'work_experience_match', e_score,
|
| 66 |
+
'education_match', ed_score,
|
| 67 |
+
'certifications_match', c_score,
|
| 68 |
+
'project_match', p_score
|
| 69 |
+
);
|
| 70 |
+
END;
|
| 71 |
+
$function$;
|
| 72 |
+
|
| 73 |
+
-- 2. CREATE THE JOB RECOMMENDATIONS RPC (Ranked Jobs)
|
| 74 |
+
-- We drop it first because changing the return schema requires it in Postgres.
|
| 75 |
+
DROP FUNCTION IF EXISTS public.get_job_recommendations(uuid, int);
|
| 76 |
+
|
| 77 |
+
CREATE OR REPLACE FUNCTION public.get_job_recommendations(p_user_id uuid, p_limit int DEFAULT 10)
|
| 78 |
+
RETURNS json
|
| 79 |
+
LANGUAGE plpgsql
|
| 80 |
+
AS $function$
|
| 81 |
+
DECLARE
|
| 82 |
+
results_json JSON;
|
| 83 |
+
BEGIN
|
| 84 |
+
SELECT json_agg(r) INTO results_json
|
| 85 |
+
FROM (
|
| 86 |
+
SELECT
|
| 87 |
+
j.id,
|
| 88 |
+
j.title,
|
| 89 |
+
j.location,
|
| 90 |
+
j.job_type,
|
| 91 |
+
j.salary_range,
|
| 92 |
+
c.name as company_name,
|
| 93 |
+
c.logo_url as company_logo,
|
| 94 |
+
(match_profile_job(p_user_id, j.id)->>'match_score')::int as match_score
|
| 95 |
+
FROM public.jobs j
|
| 96 |
+
JOIN public.companies c ON j.company_id = c.id
|
| 97 |
+
WHERE j.status = 'Active'
|
| 98 |
+
ORDER BY match_score DESC
|
| 99 |
+
LIMIT p_limit
|
| 100 |
+
) r;
|
| 101 |
+
|
| 102 |
+
RETURN coalesce(results_json, '[]'::json);
|
| 103 |
+
END;
|
| 104 |
+
$function$;
|
backend/requirements.txt
CHANGED
|
@@ -26,3 +26,4 @@ fastapi>=0.109.0
|
|
| 26 |
uvicorn>=0.27.0
|
| 27 |
python-multipart>=0.0.9
|
| 28 |
google-genai>=0.2.0
|
|
|
|
|
|
| 26 |
uvicorn>=0.27.0
|
| 27 |
python-multipart>=0.0.9
|
| 28 |
google-genai>=0.2.0
|
| 29 |
+
scikit-learn>=1.3.0
|
backend/script_output.txt
ADDED
|
File without changes
|
backend/src/embeddings/benchmark_bge.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
import psutil
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
def benchmark_bge():
|
| 9 |
+
print("🚀 Starting BGE-M3 Efficiency Benchmark...")
|
| 10 |
+
|
| 11 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
+
print(f"💻 Device: {device}")
|
| 13 |
+
|
| 14 |
+
print("📥 Loading BAAI/bge-m3...")
|
| 15 |
+
start_load = time.time()
|
| 16 |
+
model = SentenceTransformer('BAAI/bge-m3', device=device)
|
| 17 |
+
print(f"⏱️ Load Time: {time.time() - start_load:.2f}s")
|
| 18 |
+
|
| 19 |
+
process = psutil.Process(os.getpid())
|
| 20 |
+
mem_info = process.memory_info()
|
| 21 |
+
print(f"📊 Memory Usage (RAM): {mem_info.rss / 1024 / 1024:.2f} MB")
|
| 22 |
+
|
| 23 |
+
sentences = [
|
| 24 |
+
"The quick brown fox jumps over the lazy dog.",
|
| 25 |
+
"Artificial intelligence is transforming the recruitment industry.",
|
| 26 |
+
"Candidate has 5 years of experience in Python and FastAPI.",
|
| 27 |
+
"Looking for a Senior Software Engineer with cloud expertise."
|
| 28 |
+
] * 25 # 100 sentences
|
| 29 |
+
|
| 30 |
+
batch_sizes = [1, 4, 8, 16, 32]
|
| 31 |
+
|
| 32 |
+
print("\n--- Latency vs Batch Size ---")
|
| 33 |
+
print(f"{'Batch Size':<12} | {'Time (s)':<10} | {'Sec/Sent':<10} | {'Throughput (sent/s)':<20}")
|
| 34 |
+
print("-" * 65)
|
| 35 |
+
|
| 36 |
+
for bs in batch_sizes:
|
| 37 |
+
start_time = time.time()
|
| 38 |
+
# Warmup
|
| 39 |
+
model.encode(sentences[:bs], batch_size=bs, show_progress_bar=False)
|
| 40 |
+
|
| 41 |
+
# Actual benchmark
|
| 42 |
+
start_time = time.time()
|
| 43 |
+
model.encode(sentences, batch_size=bs, show_progress_bar=False)
|
| 44 |
+
end_time = time.time()
|
| 45 |
+
|
| 46 |
+
total_time = end_time - start_time
|
| 47 |
+
sec_per_sent = total_time / len(sentences)
|
| 48 |
+
throughput = len(sentences) / total_time
|
| 49 |
+
|
| 50 |
+
print(f"{bs:<12} | {total_time:<10.3f} | {sec_per_sent:<10.4f} | {throughput:<20.2f}")
|
| 51 |
+
|
| 52 |
+
print("\n✅ Benchmark Complete.")
|
| 53 |
+
|
| 54 |
+
if __name__ == "__main__":
|
| 55 |
+
benchmark_bge()
|
backend/src/embeddings/evaluate_quality.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import json
|
| 5 |
+
import random
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
# Set encoding for Windows terminals
|
| 9 |
+
if sys.platform == "win32":
|
| 10 |
+
import io
|
| 11 |
+
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 12 |
+
|
| 13 |
+
# Add backend to path
|
| 14 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
|
| 15 |
+
|
| 16 |
+
from backend.src.embeddings.local_embedder import generate_embedding
|
| 17 |
+
|
| 18 |
+
def cosine_similarity(v1, v2):
|
| 19 |
+
return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
|
| 20 |
+
|
| 21 |
+
def inject_noise(text, is_skill=False):
|
| 22 |
+
"""Simulates real-world messy resumes with abbreviations, typos, and lowercasing."""
|
| 23 |
+
if random.random() < 0.3: # 30% chance to leave perfectly clean
|
| 24 |
+
return text
|
| 25 |
+
|
| 26 |
+
abbreviations = {
|
| 27 |
+
"Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS",
|
| 28 |
+
"React": "ReactJS", "Machine Learning": "ML", "Amazon Web Services": "AWS",
|
| 29 |
+
"Kubernetes": "K8s", "TypeScript": "TS", "User Experience": "UX"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
if is_skill and text in abbreviations and random.random() > 0.5:
|
| 33 |
+
return abbreviations[text]
|
| 34 |
+
|
| 35 |
+
# Randomly lowercase everything (common in lazy resumes)
|
| 36 |
+
if random.random() > 0.7:
|
| 37 |
+
text = text.lower()
|
| 38 |
+
|
| 39 |
+
return text
|
| 40 |
+
|
| 41 |
+
def generate_adversarial_dataset():
|
| 42 |
+
"""Generates 200 candidates with intentional distractors and noise."""
|
| 43 |
+
print("Building N=200 Adversarial Candidate Pool...")
|
| 44 |
+
|
| 45 |
+
domains = [
|
| 46 |
+
("Frontend_React", ["React", "JavaScript", "Tailwind", "CSS", "TypeScript"]),
|
| 47 |
+
("Frontend_Angular", ["Angular", "JavaScript", "SCSS", "HTML", "TypeScript"]),
|
| 48 |
+
("Backend_Python", ["Python", "FastAPI", "PostgreSQL", "Docker", "Linux"]),
|
| 49 |
+
("Backend_Java", ["Java", "Spring Boot", "MySQL", "Kafka", "Kubernetes"]),
|
| 50 |
+
("Data_Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning"]),
|
| 51 |
+
("Data_Engineer", ["Spark", "Airflow", "Python", "SQL", "AWS"]),
|
| 52 |
+
("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "AWS"]),
|
| 53 |
+
("Mobile_iOS", ["Swift", "Objective-C", "iOS", "XCode", "CoreData"]),
|
| 54 |
+
("Mobile_Android", ["Kotlin", "Java", "Android Studio", "Jetpack", "Firebase"]),
|
| 55 |
+
("Cybersecurity", ["Network Security", "Penetration Testing", "Firewalls", "Linux", "Python"])
|
| 56 |
+
]
|
| 57 |
+
levels = ["Junior", "Mid-Level", "Senior", "Lead"]
|
| 58 |
+
|
| 59 |
+
candidates = []
|
| 60 |
+
golden_dataset = []
|
| 61 |
+
|
| 62 |
+
cand_counter = 1
|
| 63 |
+
|
| 64 |
+
# Generate 40 Queries (10 domains x 4 levels)
|
| 65 |
+
for domain_name, base_skills in domains:
|
| 66 |
+
for level in levels:
|
| 67 |
+
# 1. The Target Candidate (Golden)
|
| 68 |
+
target_id = f"cand_{cand_counter}_TARGET_{level}_{domain_name}"
|
| 69 |
+
target_skills = [inject_noise(s, True) for s in base_skills]
|
| 70 |
+
candidates.append({
|
| 71 |
+
"id": target_id,
|
| 72 |
+
"headline": f"{level} {domain_name.replace('_', ' ')} Engineer",
|
| 73 |
+
"summary": inject_noise(f"Experienced {level} professional in {domain_name}. Passionate about building scalable architectures."),
|
| 74 |
+
"skills": target_skills,
|
| 75 |
+
"experience": [inject_noise(f"Built systems using {target_skills[0]} and {target_skills[1]}.")]
|
| 76 |
+
})
|
| 77 |
+
cand_counter += 1
|
| 78 |
+
|
| 79 |
+
# The Query (Clean, formal HR language)
|
| 80 |
+
query = f"Hiring a {level} professional in {domain_name.replace('_', ' ')}. Must have strong experience with {base_skills[0]}, {base_skills[1]}, and {base_skills[2]}."
|
| 81 |
+
golden_dataset.append({"query": query, "relevant_id": target_id})
|
| 82 |
+
|
| 83 |
+
# 2. Seniority Distractor (Wrong level, perfect skills)
|
| 84 |
+
distractor_level = "Senior" if level == "Junior" else "Junior"
|
| 85 |
+
candidates.append({
|
| 86 |
+
"id": f"cand_{cand_counter}_DISTRACTOR_LEVEL_{domain_name}",
|
| 87 |
+
"headline": f"{distractor_level} {domain_name.replace('_', ' ')} Engineer",
|
| 88 |
+
"summary": f"A {distractor_level} developer specializing in {domain_name}.",
|
| 89 |
+
"skills": base_skills, # Same exact skills to confuse the model
|
| 90 |
+
"experience": [f"Worked extensively with {base_skills[0]}."]
|
| 91 |
+
})
|
| 92 |
+
cand_counter += 1
|
| 93 |
+
|
| 94 |
+
# 3. Skill Distractor (Right level, missing core skill, has similar skill)
|
| 95 |
+
altered_skills = base_skills.copy()
|
| 96 |
+
altered_skills[0] = "C++" # Replace core skill with something irrelevant
|
| 97 |
+
candidates.append({
|
| 98 |
+
"id": f"cand_{cand_counter}_DISTRACTOR_SKILL_{domain_name}",
|
| 99 |
+
"headline": f"{level} Software Engineer",
|
| 100 |
+
"summary": f"Focuses on {altered_skills[0]} and backend architecture.",
|
| 101 |
+
"skills": altered_skills,
|
| 102 |
+
"experience": [f"Maintained legacy {altered_skills[0]} codebases."]
|
| 103 |
+
})
|
| 104 |
+
cand_counter += 1
|
| 105 |
+
|
| 106 |
+
# 4 & 5. Random Noise Candidates (Fill out the 200)
|
| 107 |
+
for _ in range(2):
|
| 108 |
+
rand_domain = random.choice(domains)
|
| 109 |
+
candidates.append({
|
| 110 |
+
"id": f"cand_{cand_counter}_RANDOM",
|
| 111 |
+
"headline": f"{random.choice(levels)} {rand_domain[0]} Dev",
|
| 112 |
+
"summary": "Looking for new opportunities. Hobbies: hiking, dog walking, photography.",
|
| 113 |
+
"skills": [inject_noise(s, True) for s in rand_domain[1]],
|
| 114 |
+
"experience": ["General software development tasks."]
|
| 115 |
+
})
|
| 116 |
+
cand_counter += 1
|
| 117 |
+
|
| 118 |
+
return candidates, golden_dataset
|
| 119 |
+
|
| 120 |
+
def evaluate_adversarial():
|
| 121 |
+
print("🚀 Starting Adversarial Robustness Evaluation...")
|
| 122 |
+
|
| 123 |
+
candidates, golden_dataset = generate_adversarial_dataset()
|
| 124 |
+
|
| 125 |
+
print(f"📊 Dataset: {len(golden_dataset)} Queries | {len(candidates)} Candidates")
|
| 126 |
+
print("⚠️ Warning: Embedding 200 profiles on CPU will take time. Please wait...\n")
|
| 127 |
+
|
| 128 |
+
# 1. Embed Candidates (Flattening)
|
| 129 |
+
candidate_embeddings = []
|
| 130 |
+
start_time = time.time()
|
| 131 |
+
|
| 132 |
+
for i, c in enumerate(candidates):
|
| 133 |
+
rich_text = f"Headline: {c['headline']}. Summary: {c['summary']} Skills: {', '.join(c['skills'])}. Experience: {' '.join(c['experience'])}"
|
| 134 |
+
candidate_embeddings.append({
|
| 135 |
+
"id": c["id"],
|
| 136 |
+
"vec": generate_embedding(rich_text)
|
| 137 |
+
})
|
| 138 |
+
if (i+1) % 20 == 0:
|
| 139 |
+
print(f" -> Embedded {i+1}/200 candidates...")
|
| 140 |
+
|
| 141 |
+
print(f"✅ Embedding complete in {time.time() - start_time:.2f} seconds.\n")
|
| 142 |
+
|
| 143 |
+
# 2. Evaluate Queries
|
| 144 |
+
mrr_total = 0
|
| 145 |
+
hits_at_1 = 0
|
| 146 |
+
hits_at_3 = 0
|
| 147 |
+
hits_at_5 = 0
|
| 148 |
+
|
| 149 |
+
for item in golden_dataset:
|
| 150 |
+
query_vec = generate_embedding(item["query"])
|
| 151 |
+
target_id = item["relevant_id"]
|
| 152 |
+
|
| 153 |
+
scores = [(c_emb["id"], cosine_similarity(query_vec, c_emb["vec"])) for c_emb in candidate_embeddings]
|
| 154 |
+
scores.sort(key=lambda x: x[1], reverse=True)
|
| 155 |
+
|
| 156 |
+
rank = -1
|
| 157 |
+
for idx, (cid, sim) in enumerate(scores):
|
| 158 |
+
if cid == target_id:
|
| 159 |
+
rank = idx + 1
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
+
if rank != -1:
|
| 163 |
+
mrr_total += (1.0 / rank)
|
| 164 |
+
if rank == 1: hits_at_1 += 1
|
| 165 |
+
if rank <= 3: hits_at_3 += 1
|
| 166 |
+
if rank <= 5: hits_at_5 += 1
|
| 167 |
+
|
| 168 |
+
# 3. Final Aggregation
|
| 169 |
+
num_queries = len(golden_dataset)
|
| 170 |
+
final_mrr = mrr_total / num_queries
|
| 171 |
+
recall_1 = hits_at_1 / num_queries
|
| 172 |
+
recall_3 = hits_at_3 / num_queries
|
| 173 |
+
recall_5 = hits_at_5 / num_queries
|
| 174 |
+
|
| 175 |
+
print("="*45)
|
| 176 |
+
print("🛡️ ADVERSARIAL RETRIEVAL METRICS (N=200)")
|
| 177 |
+
print("="*45)
|
| 178 |
+
print(f"MRR (Mean Reciprocal Rank): {final_mrr:.4f}")
|
| 179 |
+
print("-" * 45)
|
| 180 |
+
print(f"Recall@1 (R@1): {recall_1*100:.1f}%")
|
| 181 |
+
print(f"Recall@3 (R@3): {recall_3*100:.1f}%")
|
| 182 |
+
print(f"Recall@5 (R@5): {recall_5*100:.1f}%")
|
| 183 |
+
print("="*45)
|
| 184 |
+
|
| 185 |
+
# Save to JSON for the guide/paper
|
| 186 |
+
with open("quality_metrics_adversarial.json", "w") as f:
|
| 187 |
+
json.dump({
|
| 188 |
+
"dataset": "N=200 Adversarial (Noise + Distractors)",
|
| 189 |
+
"mrr": final_mrr,
|
| 190 |
+
"recall_1": recall_1,
|
| 191 |
+
"recall_3": recall_3
|
| 192 |
+
}, f, indent=4)
|
| 193 |
+
|
| 194 |
+
print("📄 Results securely saved to 'quality_metrics_adversarial.json'")
|
| 195 |
+
|
| 196 |
+
if __name__ == "__main__":
|
| 197 |
+
evaluate_adversarial()
|
backend/src/embeddings/job_embed.py
CHANGED
|
@@ -92,7 +92,7 @@ def safe_generate_and_store_job_embeddings(client, job_id: str) -> None:
|
|
| 92 |
"skills": generate_list_embedding(skills),
|
| 93 |
"technical_skills": generate_list_embedding(technical_skills),
|
| 94 |
"tools": generate_list_embedding(tools),
|
| 95 |
-
"
|
| 96 |
"education": generate_embedding(education),
|
| 97 |
"certifications": generate_list_embedding(certifications),
|
| 98 |
"updated_at": "now()"
|
|
|
|
| 92 |
"skills": generate_list_embedding(skills),
|
| 93 |
"technical_skills": generate_list_embedding(technical_skills),
|
| 94 |
"tools": generate_list_embedding(tools),
|
| 95 |
+
"work_experience": generate_embedding(experience),
|
| 96 |
"education": generate_embedding(education),
|
| 97 |
"certifications": generate_list_embedding(certifications),
|
| 98 |
"updated_at": "now()"
|
backend/src/embeddings/match_benchmark_granular.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import json
|
| 5 |
+
import random
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
from sentence_transformers import SentenceTransformer
|
| 9 |
+
|
| 10 |
+
# Set encoding for Windows terminals
|
| 11 |
+
# Removing potentially problematic wrapper for background logging
|
| 12 |
+
# if sys.platform == "win32":
|
| 13 |
+
# import io
|
| 14 |
+
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 15 |
+
|
| 16 |
+
# Add backend to path
|
| 17 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
|
| 18 |
+
|
| 19 |
+
# ---------------------------------------------------------------------
|
| 20 |
+
# UTILS & NOISE SIMULATION
|
| 21 |
+
# ---------------------------------------------------------------------
|
| 22 |
+
|
| 23 |
+
def cosine_similarity(v1, v2):
|
| 24 |
+
if v1 is None or v2 is None: return 0.0
|
| 25 |
+
norm1 = np.linalg.norm(v1)
|
| 26 |
+
norm2 = np.linalg.norm(v2)
|
| 27 |
+
if norm1 == 0 or norm2 == 0: return 0.0
|
| 28 |
+
return np.dot(v1, v2) / (norm1 * norm2)
|
| 29 |
+
|
| 30 |
+
def jaccard_similarity(list1, list2):
|
| 31 |
+
s1 = set([str(x).lower().strip() for x in list1])
|
| 32 |
+
s2 = set([str(x).lower().strip() for x in list2])
|
| 33 |
+
if not s1 or not s2: return 0.0
|
| 34 |
+
return len(s1.intersection(s2)) / len(s1.union(s2))
|
| 35 |
+
|
| 36 |
+
def inject_real_world_noise(text, is_skill=False):
|
| 37 |
+
"""Simulates typos, abbreviations, and informal language."""
|
| 38 |
+
if random.random() < 0.2: return text # 20% keep clean
|
| 39 |
+
|
| 40 |
+
abbrev = {
|
| 41 |
+
"Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS",
|
| 42 |
+
"React": "ReactJS", "Machine Learning": "ML", "Kubernetes": "K8s",
|
| 43 |
+
"TypeScript": "TS", "Amazon Web Services": "AWS", "Google Cloud": "GCP"
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
# Apply abbreviation
|
| 47 |
+
if is_skill and text in abbrev and random.random() > 0.4:
|
| 48 |
+
return abbrev[text]
|
| 49 |
+
|
| 50 |
+
# Inject "Messy" Resume fillers
|
| 51 |
+
fillers = ["Highly skilled in", "Practical knowledge of", "Working with", "Extensive experience in"]
|
| 52 |
+
if random.random() > 0.7 and not is_skill:
|
| 53 |
+
text = f"{random.choice(fillers)} {text}"
|
| 54 |
+
|
| 55 |
+
# Random case noise
|
| 56 |
+
if random.random() > 0.8:
|
| 57 |
+
text = text.lower()
|
| 58 |
+
|
| 59 |
+
return text
|
| 60 |
+
|
| 61 |
+
# ---------------------------------------------------------------------
|
| 62 |
+
# DATASET GENERATION
|
| 63 |
+
# ---------------------------------------------------------------------
|
| 64 |
+
|
| 65 |
+
def generate_bench_dataset(num_candidates=100):
|
| 66 |
+
print(f"🛠️ Generating N={num_candidates} Real-World Synthetic Dataset...")
|
| 67 |
+
|
| 68 |
+
domains = [
|
| 69 |
+
("Cloud_Architect", ["AWS", "Terraform", "Kubernetes", "Docker"], ["Solutions Associate", "AWS Architect"]),
|
| 70 |
+
("Backend_Dev", ["Python", "FastAPI", "PostgreSQL", "Redis"], ["Python Cert", "FastAPI Expert"]),
|
| 71 |
+
("Frontend_Dev", ["React", "TypeScript", "Tailwind", "Next.js"], ["Meta React Cert", "JS Expert"]),
|
| 72 |
+
("Data_Science", ["Python", "PyTorch", "SQL", "Pandas"], ["TensorFlow Cert", "Data Pro"]),
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
candidates = []
|
| 76 |
+
queries = [] # JDs
|
| 77 |
+
|
| 78 |
+
# We generate balanced pairs
|
| 79 |
+
for i in range(num_candidates):
|
| 80 |
+
domain_name, skills, certs = domains[i % len(domains)]
|
| 81 |
+
level = random.choice(["Junior", "Senior", "Lead"])
|
| 82 |
+
|
| 83 |
+
# 1. The Candidate Data
|
| 84 |
+
cand_id = f"cand_{i}_{domain_name}"
|
| 85 |
+
noisy_skills = [inject_real_world_noise(s, True) for s in skills]
|
| 86 |
+
|
| 87 |
+
candidates.append({
|
| 88 |
+
"id": cand_id,
|
| 89 |
+
"skills": noisy_skills,
|
| 90 |
+
"tech_skills": noisy_skills, # Project uses both
|
| 91 |
+
"experience": [f"Developed {domain_name} solutions at Tech {i}."],
|
| 92 |
+
"certifications": [certs[0]] if random.random() > 0.5 else [],
|
| 93 |
+
"full_text": f"{level} {domain_name}. Skills: {', '.join(noisy_skills)}"
|
| 94 |
+
})
|
| 95 |
+
|
| 96 |
+
# 2. The Matching Query (JD) - Formal Clean Version
|
| 97 |
+
jd_text = f"We are looking for a {level} {domain_name.replace('_', ' ')}. Must have expertise in {skills[0]}, {skills[1]}, and {skills[2]}."
|
| 98 |
+
queries.append({
|
| 99 |
+
"query": jd_text,
|
| 100 |
+
"relevant_id": cand_id,
|
| 101 |
+
"jd_structured": {
|
| 102 |
+
"skills": skills,
|
| 103 |
+
"tech_skills": skills,
|
| 104 |
+
"experience": [f"{level} {domain_name} experience."],
|
| 105 |
+
"certifications": certs
|
| 106 |
+
}
|
| 107 |
+
})
|
| 108 |
+
|
| 109 |
+
return candidates, queries
|
| 110 |
+
|
| 111 |
+
# ---------------------------------------------------------------------
|
| 112 |
+
# BENCHMARK RUNNER
|
| 113 |
+
# ---------------------------------------------------------------------
|
| 114 |
+
|
| 115 |
+
def run_benchmark():
|
| 116 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 117 |
+
print(f"🚀 Loading Models on {device}...", flush=True)
|
| 118 |
+
|
| 119 |
+
# Load Models
|
| 120 |
+
bert_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
| 121 |
+
bge_model = SentenceTransformer('BAAI/bge-m3', device=device)
|
| 122 |
+
|
| 123 |
+
candidates, queries = generate_bench_dataset(250)
|
| 124 |
+
|
| 125 |
+
# Save the synthetic dataset to a JSON file for inspection
|
| 126 |
+
with open("synthetic_dataset_adversarial.json", "w", encoding="utf-8") as f:
|
| 127 |
+
json.dump({"candidates": candidates, "queries": queries}, f, indent=4)
|
| 128 |
+
print(f"💾 Saved generated synthetic dataset to 'synthetic_dataset_adversarial.json'", flush=True)
|
| 129 |
+
|
| 130 |
+
# Pre-calculate Candidate Embeddings
|
| 131 |
+
print("🧠 Indexing Candidates...")
|
| 132 |
+
start_idx = time.time()
|
| 133 |
+
for i, c in enumerate(candidates):
|
| 134 |
+
# BERT Flattened
|
| 135 |
+
c["bert_vec"] = bert_model.encode(c["full_text"])
|
| 136 |
+
# BGE Flattened
|
| 137 |
+
c["bge_flat_vec"] = bge_model.encode(c["full_text"])
|
| 138 |
+
# BGE Granular (Project Method)
|
| 139 |
+
c["bge_granular"] = {
|
| 140 |
+
"skills": bge_model.encode(" ".join(c["skills"])),
|
| 141 |
+
"tech_skills": bge_model.encode(" ".join(c["tech_skills"])),
|
| 142 |
+
"experience": bge_model.encode(" ".join(c["experience"])),
|
| 143 |
+
"certs": bge_model.encode(" ".join(c["certifications"])) if c["certifications"] else np.zeros(1024)
|
| 144 |
+
}
|
| 145 |
+
if (i+1) % 50 == 0:
|
| 146 |
+
print(f" -> Indexed {i+1}/{len(candidates)} candidates...", flush=True)
|
| 147 |
+
print(f"✅ Indexed in {time.time() - start_idx:.2f}s")
|
| 148 |
+
|
| 149 |
+
# Evaluation Loops
|
| 150 |
+
methods = ["Jaccard_Baseline", "BERT_Flattened", "BGE_Flattened", "BGE_Granular_Weighted"]
|
| 151 |
+
results = {m: {"mrr": 0, "r1": 0, "r3": 0} for m in methods}
|
| 152 |
+
|
| 153 |
+
weights = {"skills": 0.35, "tech_skills": 0.35, "experience": 0.20, "certs": 0.10}
|
| 154 |
+
|
| 155 |
+
print("\nEvaluating Queries...")
|
| 156 |
+
for i, q in enumerate(queries):
|
| 157 |
+
target_id = q["relevant_id"]
|
| 158 |
+
jd_text = q["query"]
|
| 159 |
+
jd_s = q["jd_structured"]
|
| 160 |
+
|
| 161 |
+
# Embed Query
|
| 162 |
+
q_bert = bert_model.encode(jd_text)
|
| 163 |
+
q_bge_flat = bge_model.encode(jd_text)
|
| 164 |
+
q_bge_g = {
|
| 165 |
+
"skills": bge_model.encode(" ".join(jd_s["skills"])),
|
| 166 |
+
"tech_skills": bge_model.encode(" ".join(jd_s["tech_skills"])),
|
| 167 |
+
"experience": bge_model.encode(" ".join(jd_s["experience"])),
|
| 168 |
+
"certs": bge_model.encode(" ".join(jd_s["certifications"]))
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
if (i+1) % 25 == 0:
|
| 172 |
+
print(f" -> Evaluated {i+1}/{len(queries)} queries...", flush=True)
|
| 173 |
+
|
| 174 |
+
# Calculate scores for all candidates
|
| 175 |
+
cand_scores = []
|
| 176 |
+
for c in candidates:
|
| 177 |
+
# 1. Jaccard
|
| 178 |
+
jac = jaccard_similarity(jd_s["skills"], c["skills"])
|
| 179 |
+
# 2. BERT
|
| 180 |
+
ber = cosine_similarity(q_bert, c["bert_vec"])
|
| 181 |
+
# 3. BGE Flat
|
| 182 |
+
bgf = cosine_similarity(q_bge_flat, c["bge_flat_vec"])
|
| 183 |
+
# 4. BGE Granular Weighted
|
| 184 |
+
bgg = (
|
| 185 |
+
cosine_similarity(q_bge_g["skills"], c["bge_granular"]["skills"]) * weights["skills"] +
|
| 186 |
+
cosine_similarity(q_bge_g["tech_skills"], c["bge_granular"]["tech_skills"]) * weights["tech_skills"] +
|
| 187 |
+
cosine_similarity(q_bge_g["experience"], c["bge_granular"]["experience"]) * weights["experience"] +
|
| 188 |
+
cosine_similarity(q_bge_g["certs"], c["bge_granular"]["certs"]) * weights["certs"]
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
cand_scores.append({
|
| 192 |
+
"id": c["id"],
|
| 193 |
+
"Jaccard_Baseline": jac,
|
| 194 |
+
"BERT_Flattened": ber,
|
| 195 |
+
"BGE_Flattened": bgf,
|
| 196 |
+
"BGE_Granular_Weighted": bgg
|
| 197 |
+
})
|
| 198 |
+
|
| 199 |
+
# Rank and Calc Metrics
|
| 200 |
+
for m in methods:
|
| 201 |
+
sorted_cands = sorted(cand_scores, key=lambda x: x[m], reverse=True)
|
| 202 |
+
rank = next(i for i, x in enumerate(sorted_cands) if x["id"] == target_id) + 1
|
| 203 |
+
|
| 204 |
+
results[m]["mrr"] += (1.0 / rank)
|
| 205 |
+
if rank == 1: results[m]["r1"] += 1
|
| 206 |
+
if rank <= 3: results[m]["r3"] += 1
|
| 207 |
+
|
| 208 |
+
# Print Results Table
|
| 209 |
+
num_q = len(queries)
|
| 210 |
+
print("\n" + "="*65)
|
| 211 |
+
print(f"{'Method':<25} | {'MRR':<8} | {'Recall@1':<10} | {'Recall@3':<10}")
|
| 212 |
+
print("-" * 65)
|
| 213 |
+
|
| 214 |
+
for m in methods:
|
| 215 |
+
mrr = results[m]["mrr"] / num_q
|
| 216 |
+
r1 = (results[m]["r1"] / num_q) * 100
|
| 217 |
+
r3 = (results[m]["r3"] / num_q) * 100
|
| 218 |
+
print(f"{m:<25} | {mrr:.4f} | {r1:>8.1f}% | {r3:>8.1f}%", flush=True)
|
| 219 |
+
print("="*65, flush=True)
|
| 220 |
+
|
| 221 |
+
# Save to file
|
| 222 |
+
summary = {m: {"mrr": results[m]["mrr"]/num_q, "r1": results[m]["r1"]/num_q, "r3": results[m]["r3"]/num_q} for m in methods}
|
| 223 |
+
with open("match_benchmark_results.json", "w") as f:
|
| 224 |
+
json.dump(summary, f, indent=4)
|
| 225 |
+
print(f"\n📄 Results saved to 'match_benchmark_results.json'", flush=True)
|
| 226 |
+
|
| 227 |
+
if __name__ == "__main__":
|
| 228 |
+
run_benchmark()
|
backend/src/embeddings/profile_entities_bench.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import numpy as np
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
# Add backend to path
|
| 8 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
|
| 9 |
+
|
| 10 |
+
from backend.src.embeddings.local_embedder import generate_embedding, generate_list_embedding
|
| 11 |
+
|
| 12 |
+
def generate_structured_profiles(num_samples=50):
|
| 13 |
+
"""Generates synthetic resumes split into specific entity fields."""
|
| 14 |
+
print(f"Generating {num_samples} structured synthetic profiles...")
|
| 15 |
+
|
| 16 |
+
domains = [
|
| 17 |
+
("Frontend", ["React", "JavaScript", "Tailwind", "CSS", "HTML", "Redux", "TypeScript", "Jest"]),
|
| 18 |
+
("Backend", ["Python", "FastAPI", "PostgreSQL", "Docker", "AWS", "Linux", "Redis", "Kafka"]),
|
| 19 |
+
("Data Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning", "NLP", "TensorFlow", "R"]),
|
| 20 |
+
("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "Jenkins", "AWS", "Bash", "Ansible"]),
|
| 21 |
+
("Mobile", ["Swift", "Kotlin", "React Native", "Flutter", "iOS", "Android", "Firebase", "SQLite"])
|
| 22 |
+
]
|
| 23 |
+
levels = ["Junior", "Mid-Level", "Senior", "Lead", "Principal"]
|
| 24 |
+
|
| 25 |
+
profiles = []
|
| 26 |
+
for i in range(num_samples):
|
| 27 |
+
domain_name, domain_skills = domains[i % len(domains)]
|
| 28 |
+
level = levels[i % len(levels)]
|
| 29 |
+
|
| 30 |
+
# Randomize skills count slightly per profile (5 to 8 skills)
|
| 31 |
+
np.random.seed(i)
|
| 32 |
+
skills_subset = list(np.random.choice(domain_skills, size=np.random.randint(5, 9), replace=False))
|
| 33 |
+
|
| 34 |
+
profile = {
|
| 35 |
+
"profile_id": f"cand_{i+1}_{domain_name.lower()}",
|
| 36 |
+
"headline": f"{level} {domain_name} Engineer",
|
| 37 |
+
"summary": f"Dedicated {level} {domain_name} professional with a proven track record of building scalable systems and working in agile environments. Passionate about clean code and modern architectures.",
|
| 38 |
+
"skills": skills_subset,
|
| 39 |
+
"experience": [
|
| 40 |
+
f"{level} Engineer at TechCorp: Spearheaded the migration to cloud infrastructure and improved system performance by 40%.",
|
| 41 |
+
f"Software Developer at Startup Inc: Developed RESTful APIs and collaborated with the frontend team to deliver features.",
|
| 42 |
+
f"Intern at Legacy Systems: Assisted in maintaining codebases and writing unit tests."
|
| 43 |
+
]
|
| 44 |
+
}
|
| 45 |
+
profiles.append(profile)
|
| 46 |
+
return profiles
|
| 47 |
+
|
| 48 |
+
def profile_entities_scaled():
|
| 49 |
+
num_samples = 50
|
| 50 |
+
profiles = generate_structured_profiles(num_samples)
|
| 51 |
+
|
| 52 |
+
print(f"\n🚀 Starting Entity-to-Embedding Efficiency Benchmark (N={num_samples})...")
|
| 53 |
+
|
| 54 |
+
# Tracking arrays
|
| 55 |
+
summary_times = []
|
| 56 |
+
headline_times = []
|
| 57 |
+
skills_times = []
|
| 58 |
+
exp_times = []
|
| 59 |
+
total_times = []
|
| 60 |
+
|
| 61 |
+
for i, p in enumerate(profiles):
|
| 62 |
+
start_total = time.time()
|
| 63 |
+
|
| 64 |
+
# 1. Profile Headline
|
| 65 |
+
start = time.time()
|
| 66 |
+
generate_embedding(p["headline"])
|
| 67 |
+
headline_times.append((time.time() - start) * 1000)
|
| 68 |
+
|
| 69 |
+
# 2. Profile Summary
|
| 70 |
+
start = time.time()
|
| 71 |
+
generate_embedding(p["summary"])
|
| 72 |
+
summary_times.append((time.time() - start) * 1000)
|
| 73 |
+
|
| 74 |
+
# 3. Profile Skills (Batch)
|
| 75 |
+
start = time.time()
|
| 76 |
+
generate_list_embedding(p["skills"])
|
| 77 |
+
skills_times.append((time.time() - start) * 1000)
|
| 78 |
+
|
| 79 |
+
# 4. Profile Experience (Batch)
|
| 80 |
+
start = time.time()
|
| 81 |
+
generate_list_embedding(p["experience"])
|
| 82 |
+
exp_times.append((time.time() - start) * 1000)
|
| 83 |
+
|
| 84 |
+
# Total
|
| 85 |
+
total_times.append((time.time() - start_total) * 1000)
|
| 86 |
+
|
| 87 |
+
if (i + 1) % 10 == 0:
|
| 88 |
+
print(f" -> Processed {i + 1}/{num_samples} profiles...")
|
| 89 |
+
|
| 90 |
+
# Calculate statistics
|
| 91 |
+
results = [
|
| 92 |
+
"IRIS Entity-to-Embedding Efficiency Results (Scaled)",
|
| 93 |
+
f"Total Profiles Evaluated: {num_samples}",
|
| 94 |
+
"-" * 60,
|
| 95 |
+
f"{'Entity Type':<15} | {'Mean Latency (ms)':<20} | {'Std Dev (ms)':<15}",
|
| 96 |
+
"-" * 60,
|
| 97 |
+
f"{'Headline':<15} | {np.mean(headline_times):<20.2f} | {np.std(headline_times):<15.2f}",
|
| 98 |
+
f"{'Summary':<15} | {np.mean(summary_times):<20.2f} | {np.std(summary_times):<15.2f}",
|
| 99 |
+
f"{'Skills (List)':<15} | {np.mean(skills_times):<20.2f} | {np.std(skills_times):<15.2f}",
|
| 100 |
+
f"{'Experience (List)':<15}| {np.mean(exp_times):<20.2f} | {np.std(exp_times):<15.2f}",
|
| 101 |
+
"-" * 60,
|
| 102 |
+
f"MEAN TOTAL PER PROFILE: {np.mean(total_times):.2f} ms",
|
| 103 |
+
f"Average Throughput: {1000 / np.mean(total_times):.3f} profiles/sec"
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
output_text = "\n".join(results)
|
| 107 |
+
print("\n" + output_text)
|
| 108 |
+
|
| 109 |
+
with open("entity_benchmark_scaled_results.txt", "w") as f:
|
| 110 |
+
f.write(output_text)
|
| 111 |
+
|
| 112 |
+
print("\n📄 Results saved to 'entity_benchmark_scaled_results.txt'.")
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
profile_entities_scaled()
|
backend/src/matching/similarity.py
CHANGED
|
@@ -3,13 +3,25 @@ import numpy as np
|
|
| 3 |
from typing import Dict, Any, List
|
| 4 |
from supabase import Client
|
| 5 |
|
| 6 |
-
def cosine_similarity(v1:
|
| 7 |
-
"""Calculates cosine similarity between two vectors."""
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
return 0.0
|
| 10 |
|
| 11 |
-
a = np.array(
|
| 12 |
-
b = np.array(
|
| 13 |
|
| 14 |
# Check if vectors are zero vectors
|
| 15 |
if np.all(a == 0) or np.all(b == 0):
|
|
@@ -51,27 +63,37 @@ async def calculate_granular_match_score(client: Client, candidate_id: str, job_
|
|
| 51 |
print(f"❌ Database error in match score: {e}")
|
| 52 |
return {"total_score": 0, "breakdown": {}, "error": str(e)}
|
| 53 |
|
| 54 |
-
# 2. Define Weights
|
| 55 |
-
# These could eventually be user-defined
|
| 56 |
WEIGHTS = {
|
| 57 |
-
"skills": 0.35,
|
| 58 |
"technical_skills": 0.35,
|
| 59 |
"experience": 0.20,
|
|
|
|
|
|
|
|
|
|
| 60 |
"certifications": 0.10
|
| 61 |
}
|
| 62 |
|
| 63 |
# 3. Calculate Individual Similarities
|
| 64 |
scores = {}
|
| 65 |
|
| 66 |
-
#
|
| 67 |
-
scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
|
| 68 |
scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
|
| 69 |
|
| 70 |
-
# Experience
|
| 71 |
-
scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
# Certifications
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
# 4. Calculate Weighted Total
|
| 77 |
total_score = 0
|
|
@@ -79,12 +101,13 @@ async def calculate_granular_match_score(client: Client, candidate_id: str, job_
|
|
| 79 |
|
| 80 |
for key, weight in WEIGHTS.items():
|
| 81 |
if scores.get(key) is not None:
|
| 82 |
-
|
|
|
|
| 83 |
available_weight += weight
|
| 84 |
|
| 85 |
-
# Normalize
|
| 86 |
if available_weight > 0:
|
| 87 |
-
final_score =
|
| 88 |
else:
|
| 89 |
final_score = 0
|
| 90 |
|
|
|
|
| 3 |
from typing import Dict, Any, List
|
| 4 |
from supabase import Client
|
| 5 |
|
| 6 |
+
def cosine_similarity(v1: Any, v2: Any) -> float:
|
| 7 |
+
"""Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
|
| 8 |
+
def parse_vector(v):
|
| 9 |
+
if isinstance(v, str):
|
| 10 |
+
try:
|
| 11 |
+
# Remove brackets and split by comma
|
| 12 |
+
return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
|
| 13 |
+
except Exception:
|
| 14 |
+
return []
|
| 15 |
+
return v if isinstance(v, list) else []
|
| 16 |
+
|
| 17 |
+
vec1 = parse_vector(v1)
|
| 18 |
+
vec2 = parse_vector(v2)
|
| 19 |
+
|
| 20 |
+
if not vec1 or not vec2 or len(vec1) != len(vec2):
|
| 21 |
return 0.0
|
| 22 |
|
| 23 |
+
a = np.array(vec1)
|
| 24 |
+
b = np.array(vec2)
|
| 25 |
|
| 26 |
# Check if vectors are zero vectors
|
| 27 |
if np.all(a == 0) or np.all(b == 0):
|
|
|
|
| 63 |
print(f"❌ Database error in match score: {e}")
|
| 64 |
return {"total_score": 0, "breakdown": {}, "error": str(e)}
|
| 65 |
|
| 66 |
+
# 2. Define Weights (Matching SQL function public.match_profile_job)
|
|
|
|
| 67 |
WEIGHTS = {
|
|
|
|
| 68 |
"technical_skills": 0.35,
|
| 69 |
"experience": 0.20,
|
| 70 |
+
"projects": 0.15,
|
| 71 |
+
"skills": 0.10,
|
| 72 |
+
"education": 0.10,
|
| 73 |
"certifications": 0.10
|
| 74 |
}
|
| 75 |
|
| 76 |
# 3. Calculate Individual Similarities
|
| 77 |
scores = {}
|
| 78 |
|
| 79 |
+
# Technical Skills
|
|
|
|
| 80 |
scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
|
| 81 |
|
| 82 |
+
# Experience
|
| 83 |
+
scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))
|
| 84 |
+
|
| 85 |
+
# Projects (Compare profile projects vs job technical skills)
|
| 86 |
+
scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))
|
| 87 |
+
|
| 88 |
+
# Skills
|
| 89 |
+
scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
|
| 90 |
+
|
| 91 |
+
# Education
|
| 92 |
+
scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))
|
| 93 |
|
| 94 |
+
# Certifications (Compare profile certs vs job technical skills or skills)
|
| 95 |
+
job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
|
| 96 |
+
scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)
|
| 97 |
|
| 98 |
# 4. Calculate Weighted Total
|
| 99 |
total_score = 0
|
|
|
|
| 101 |
|
| 102 |
for key, weight in WEIGHTS.items():
|
| 103 |
if scores.get(key) is not None:
|
| 104 |
+
# Scale to 100 like SQL
|
| 105 |
+
total_score += (scores[key] * 100) * weight
|
| 106 |
available_weight += weight
|
| 107 |
|
| 108 |
+
# Normalize
|
| 109 |
if available_weight > 0:
|
| 110 |
+
final_score = total_score / available_weight
|
| 111 |
else:
|
| 112 |
final_score = 0
|
| 113 |
|
backend/src/services/clustering_service.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.cluster import KMeans
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
from google import genai
|
| 6 |
+
import google.genai.types as types
|
| 7 |
+
from supabase import create_client, Client
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Load environment variables
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
class ClusteringService:
|
| 14 |
+
def __init__(self):
|
| 15 |
+
url = os.environ.get("SUPABASE_URL")
|
| 16 |
+
key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 17 |
+
self.client: Client = create_client(url, key)
|
| 18 |
+
self.gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
| 19 |
+
|
| 20 |
+
def fetch_all_embeddings(self) -> List[Dict[str, Any]]:
|
| 21 |
+
"""Fetch IDs and concatenated embeddings for all profiles."""
|
| 22 |
+
print("🔍 Fetching profile embeddings...")
|
| 23 |
+
# We'll use 'technical_skills' or 'headline' as a representative embedding for clustering
|
| 24 |
+
# Or concatenate multiple if available. For simplicity, we use 'technical_skills'
|
| 25 |
+
resp = self.client.table("profile_embeddings").select("id, technical_skills").execute()
|
| 26 |
+
return resp.data
|
| 27 |
+
|
| 28 |
+
def perform_clustering(self, data: List[Dict[str, Any]], n_clusters: int = 5):
|
| 29 |
+
"""Perform K-Means clustering on the fetched embeddings."""
|
| 30 |
+
if not data:
|
| 31 |
+
print("⚠️ No data to cluster.")
|
| 32 |
+
return []
|
| 33 |
+
|
| 34 |
+
# Extract vectors
|
| 35 |
+
X = []
|
| 36 |
+
ids = []
|
| 37 |
+
import json
|
| 38 |
+
for item in data:
|
| 39 |
+
raw_vec = item.get("technical_skills")
|
| 40 |
+
if raw_vec:
|
| 41 |
+
try:
|
| 42 |
+
# If it's a string, parse it
|
| 43 |
+
if isinstance(raw_vec, str):
|
| 44 |
+
# Some versions of postgrest return vectors as strings like '[0.1, 0.2]'
|
| 45 |
+
vec = json.loads(raw_vec)
|
| 46 |
+
else:
|
| 47 |
+
vec = raw_vec
|
| 48 |
+
|
| 49 |
+
X.append(vec)
|
| 50 |
+
ids.append(item["id"])
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"⚠️ Failed to parse embedding for {item['id']}: {e}")
|
| 53 |
+
|
| 54 |
+
if len(X) < n_clusters:
|
| 55 |
+
n_clusters = max(1, len(X))
|
| 56 |
+
|
| 57 |
+
print(f"🤖 Performing K-Means clustering (K={n_clusters})...")
|
| 58 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
|
| 59 |
+
labels = kmeans.fit_predict(X)
|
| 60 |
+
|
| 61 |
+
return [{"id": ids[i], "cluster": int(labels[i])} for i in range(len(ids))]
|
| 62 |
+
|
| 63 |
+
def generate_labels_for_clusters(self, clustered_data: List[Dict[str, Any]]) -> Dict[int, str]:
|
| 64 |
+
"""Generate human-readable labels for each cluster using Gemini."""
|
| 65 |
+
cluster_groups = {}
|
| 66 |
+
for item in clustered_data:
|
| 67 |
+
c = item["cluster"]
|
| 68 |
+
if c not in cluster_groups:
|
| 69 |
+
cluster_groups[c] = []
|
| 70 |
+
cluster_groups[c].append(item["id"])
|
| 71 |
+
|
| 72 |
+
labels = {}
|
| 73 |
+
for cluster_id, user_ids in cluster_groups.items():
|
| 74 |
+
# Fetch sample details for these users to describe the cluster
|
| 75 |
+
sample_ids = user_ids[:5]
|
| 76 |
+
profiles_resp = self.client.table("profiles").select("headline, technical_skills").in_("id", sample_ids).execute()
|
| 77 |
+
|
| 78 |
+
sample_text = "\n".join([
|
| 79 |
+
f"- {p.get('headline')} (Skills: {p.get('technical_skills')})"
|
| 80 |
+
for p in profiles_resp.data
|
| 81 |
+
])
|
| 82 |
+
|
| 83 |
+
prompt = f"""
|
| 84 |
+
You are an expert HR Talent Acquisition Specialist.
|
| 85 |
+
Analyze the following representative professional profiles from a talent pool and provide a perfect, professional job title that best encapsulates the entire group.
|
| 86 |
+
|
| 87 |
+
CRITERIA:
|
| 88 |
+
- Concise: Exactly 2-4 words.
|
| 89 |
+
- Professional: Use industry-standard terminology (e.g., "Full Stack Engineer", "DevOps Architect").
|
| 90 |
+
- Accurate: Reflect the common denominator in seniority and technical domain.
|
| 91 |
+
- Formatting: Return ONLY the title string, no quotes, no extra text.
|
| 92 |
+
|
| 93 |
+
REPRESENTATIVE PROFILES:
|
| 94 |
+
{sample_text}
|
| 95 |
+
|
| 96 |
+
PERFECT JOB TITLE:
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
import time
|
| 100 |
+
max_retries = 3
|
| 101 |
+
label = "Unknown Group"
|
| 102 |
+
|
| 103 |
+
for attempt in range(max_retries):
|
| 104 |
+
try:
|
| 105 |
+
response = self.gemini_client.models.generate_content(
|
| 106 |
+
model="gemini-2.5-flash-lite",
|
| 107 |
+
contents=prompt,
|
| 108 |
+
config=types.GenerateContentConfig(temperature=0)
|
| 109 |
+
)
|
| 110 |
+
label = response.text.strip().replace('"', '')
|
| 111 |
+
break
|
| 112 |
+
except Exception as e:
|
| 113 |
+
if attempt < max_retries - 1:
|
| 114 |
+
wait = 2 ** (attempt + 1)
|
| 115 |
+
print(f"⚠️ Labeling failed for Cluster {cluster_id}. Retrying in {wait}s... ({e})")
|
| 116 |
+
time.sleep(wait)
|
| 117 |
+
else:
|
| 118 |
+
print(f"❌ Labeling failed for Cluster {cluster_id} after {max_retries} attempts.")
|
| 119 |
+
|
| 120 |
+
labels[cluster_id] = label
|
| 121 |
+
print(f"✅ Cluster {cluster_id} Label: {label}")
|
| 122 |
+
time.sleep(1) # Small pause between clusters
|
| 123 |
+
|
| 124 |
+
return labels
|
| 125 |
+
|
| 126 |
+
def update_database_with_labels(self, clustered_data: List[Dict[str, Any]], cluster_labels: Dict[int, str]):
|
| 127 |
+
"""Update the profiles table with the new cluster labels."""
|
| 128 |
+
print("💾 Updating database with cluster labels...")
|
| 129 |
+
for item in clustered_data:
|
| 130 |
+
user_id = item["id"]
|
| 131 |
+
label = cluster_labels[item["cluster"]]
|
| 132 |
+
|
| 133 |
+
self.client.table("profiles").update({"cluster_label": label}).eq("id", user_id).execute()
|
| 134 |
+
print("✨ Database successfully updated.")
|
| 135 |
+
|
| 136 |
+
def run_clustering_pipeline(self, n_clusters: int = 5):
|
| 137 |
+
"""Orchestrate the full clustering pipeline."""
|
| 138 |
+
data = self.fetch_all_embeddings()
|
| 139 |
+
clustered_results = self.perform_clustering(data, n_clusters)
|
| 140 |
+
if not clustered_results:
|
| 141 |
+
return
|
| 142 |
+
|
| 143 |
+
labels = self.generate_labels_for_clusters(clustered_results)
|
| 144 |
+
self.update_database_with_labels(clustered_results, labels)
|
| 145 |
+
|
| 146 |
+
if __name__ == "__main__":
|
| 147 |
+
service = ClusteringService()
|
| 148 |
+
service.run_clustering_pipeline(n_clusters=5)
|
backend/src/services/test_clustering.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Add backend/src to path
|
| 5 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
|
| 6 |
+
|
| 7 |
+
from src.services.clustering_service import ClusteringService
|
| 8 |
+
|
| 9 |
+
def test_clustering_pipeline():
|
| 10 |
+
print("🚀 Starting Clustering Pipeline Test...")
|
| 11 |
+
service = ClusteringService()
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
# Run clustering with 5 clusters for more granular grouping
|
| 15 |
+
service.run_clustering_pipeline(n_clusters=5)
|
| 16 |
+
print("✅ Pipeline test completed successfully.")
|
| 17 |
+
except Exception as e:
|
| 18 |
+
print(f"❌ Pipeline test failed: {e}")
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
test_clustering_pipeline()
|
backend/src/services/verify_labels.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
from supabase import create_client, Client
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
# Add backend/src to path
|
| 7 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
|
| 8 |
+
|
| 9 |
+
# Load environment variables
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
def verify_labels():
|
| 13 |
+
print("🔍 Fetching generated cluster labels from database...")
|
| 14 |
+
url = os.environ.get("SUPABASE_URL")
|
| 15 |
+
key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
|
| 16 |
+
client: Client = create_client(url, key)
|
| 17 |
+
|
| 18 |
+
resp = client.table("profiles").select("full_name, headline, cluster_label").not_.is_("cluster_label", "null").order("cluster_label").execute()
|
| 19 |
+
|
| 20 |
+
if not resp or not hasattr(resp, 'data') or resp.data is None:
|
| 21 |
+
print("⚠️ No cluster labels found or database error.")
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
print(f"\n{'Name':<25} | {'Original Headline':<35} | {'Cluster Label'}")
|
| 25 |
+
print("-" * 85)
|
| 26 |
+
for p in resp.data[:15]: # Show first 15
|
| 27 |
+
name = (p.get('full_name') or "Unknown")[:25]
|
| 28 |
+
headline = (p.get('headline') or "N/A")[:35]
|
| 29 |
+
label = p.get('cluster_label') or "Unknown"
|
| 30 |
+
print(f"{name:<25} | {headline:<35} | {label}")
|
| 31 |
+
|
| 32 |
+
# Show distinct labels
|
| 33 |
+
all_labels = [p.get('cluster_label') for p in resp.data if p.get('cluster_label')]
|
| 34 |
+
distinct_labels = sorted(list(set(all_labels)))
|
| 35 |
+
print("\n📦 Distinct Talent Pools (Clusters):")
|
| 36 |
+
for idx, l in enumerate(distinct_labels, 1):
|
| 37 |
+
count = all_labels.count(l)
|
| 38 |
+
print(f"{idx}. {l} ({count} candidates)")
|
| 39 |
+
|
| 40 |
+
if __name__ == "__main__":
|
| 41 |
+
verify_labels()
|
backend/supabase_ingest.py
CHANGED
|
@@ -57,7 +57,7 @@ if SUPABASE_URL and SUPABASE_KEY:
|
|
| 57 |
else:
|
| 58 |
print("⚠️ Warning: Supabase Credentials not found in environment. Only library functions will fail if called without a client.")
|
| 59 |
|
| 60 |
-
ALLOWED_EXTENSIONS = {".pdf", ".docx"}
|
| 61 |
|
| 62 |
# ---------------------------------------------------------------------
|
| 63 |
# UTILS
|
|
@@ -212,15 +212,15 @@ def upsert_profile(client, payload: Dict[str, Any]):
|
|
| 212 |
# UNIFIED PROCESSING FUNCTION (Called by API and Main)
|
| 213 |
# ---------------------------------------------------------------------
|
| 214 |
|
| 215 |
-
def process_resume(client, user_id: str, file_path: str, temp_dir: str = "data/resumes/raw") -> Dict[str, Any]:
|
| 216 |
"""
|
| 217 |
Downloads, extracts, and upserts a resume.
|
| 218 |
Used by both the API (real-time) and the main script (batch).
|
| 219 |
"""
|
| 220 |
try:
|
| 221 |
# 1. Download
|
| 222 |
-
print(f"⬇️ Downloading {file_path}...")
|
| 223 |
-
local_path = download_object(client,
|
| 224 |
|
| 225 |
# 2. Extract
|
| 226 |
print("🧠 Sending to Gemini...")
|
|
@@ -312,6 +312,11 @@ def main():
|
|
| 312 |
except Exception as e:
|
| 313 |
print(f" ⚠️ Embedding generation failed (non-critical): {e}")
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
except Exception as e:
|
| 316 |
print(f" ❌ Pipeline failed for this file: {e}")
|
| 317 |
|
|
|
|
| 57 |
else:
|
| 58 |
print("⚠️ Warning: Supabase Credentials not found in environment. Only library functions will fail if called without a client.")
|
| 59 |
|
| 60 |
+
ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc"}
|
| 61 |
|
| 62 |
# ---------------------------------------------------------------------
|
| 63 |
# UTILS
|
|
|
|
| 212 |
# UNIFIED PROCESSING FUNCTION (Called by API and Main)
|
| 213 |
# ---------------------------------------------------------------------
|
| 214 |
|
| 215 |
+
def process_resume(client, user_id: str, file_path: str, bucket: str = "resume", temp_dir: str = "data/resumes/raw") -> Dict[str, Any]:
|
| 216 |
"""
|
| 217 |
Downloads, extracts, and upserts a resume.
|
| 218 |
Used by both the API (real-time) and the main script (batch).
|
| 219 |
"""
|
| 220 |
try:
|
| 221 |
# 1. Download
|
| 222 |
+
print(f"⬇️ Downloading {file_path} from bucket '{bucket}'...")
|
| 223 |
+
local_path = download_object(client, bucket, file_path, temp_dir)
|
| 224 |
|
| 225 |
# 2. Extract
|
| 226 |
print("🧠 Sending to Gemini...")
|
|
|
|
| 312 |
except Exception as e:
|
| 313 |
print(f" ⚠️ Embedding generation failed (non-critical): {e}")
|
| 314 |
|
| 315 |
+
# 8. Cleanup
|
| 316 |
+
if os.path.exists(local_path):
|
| 317 |
+
os.remove(local_path)
|
| 318 |
+
print(" 🗑️ Cleaned up temporary file.")
|
| 319 |
+
|
| 320 |
except Exception as e:
|
| 321 |
print(f" ❌ Pipeline failed for this file: {e}")
|
| 322 |
|
backend/test_ingest_output.txt
ADDED
|
File without changes
|
debug_log.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Testing 896d6c15-2d98-4435-9869-0f11e4db48bd against 45bcca29-4e12-45bf-97d4-0b77ff55472f
|
| 2 |
+
|
| 3 |
+
--- Profile Lengths ---
|
| 4 |
+
skills: 1024
|
| 5 |
+
technical_skills: 1024
|
| 6 |
+
experience: 1024
|
| 7 |
+
certifications: 1024
|
| 8 |
+
|
| 9 |
+
--- Job Lengths ---
|
| 10 |
+
skills: 1024
|
| 11 |
+
technical_skills: 1024
|
| 12 |
+
work_experience: 1024
|
| 13 |
+
certifications: None
|
| 14 |
+
|
| 15 |
+
Result: {"total_score": 80.5, "breakdown": {"technical_skills": 95.8, "experience": 62.7, "projects": 93.3, "skills": 75.5, "education": 59.6, "certifications": 69.1}, "weights": {"technical_skills": 0.35, "experience": 0.2, "projects": 0.15, "skills": 0.1, "education": 0.1, "certifications": 0.1}}
|
entity_benchmark_scaled_results.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
IRIS Entity-to-Embedding Efficiency Results (Scaled)
|
| 2 |
+
Total Profiles Evaluated: 50
|
| 3 |
+
------------------------------------------------------------
|
| 4 |
+
Entity Type | Mean Latency (ms) | Std Dev (ms)
|
| 5 |
+
------------------------------------------------------------
|
| 6 |
+
Headline | 965.78 | 2969.16
|
| 7 |
+
Summary | 785.70 | 141.60
|
| 8 |
+
Skills (List) | 780.01 | 160.76
|
| 9 |
+
Experience (List)| 1005.30 | 185.11
|
| 10 |
+
------------------------------------------------------------
|
| 11 |
+
MEAN TOTAL PER PROFILE: 3536.80 ms
|
| 12 |
+
Average Throughput: 0.283 profiles/sec
|
experimental_results.tex
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
\section{Experimental Results}
|
| 2 |
+
\label{sec:experimental_results}
|
| 3 |
+
|
| 4 |
+
In this section, we present the empirical evaluation of the IRIS system, focusing on two key dimensions: computational efficiency (latency and throughput) and retrieval accuracy.
|
| 5 |
+
|
| 6 |
+
\subsection{Computational Efficiency}
|
| 7 |
+
The efficiency of the entity extraction and embedding pipeline was evaluated using a dataset of 50 candidate profiles. The pipeline consists of extracting specific entities—Headline, Summary, Skills, and Experience—and generating their corresponding embeddings using the BGE-M3 model.
|
| 8 |
+
|
| 9 |
+
Table~\ref{tab:latency_results} summarizes the mean latency and standard deviation for each entity type.
|
| 10 |
+
|
| 11 |
+
\begin{table}[h]
|
| 12 |
+
\centering
|
| 13 |
+
\caption{Mean Latency and Standard Deviation per Entity Extraction (N=50)}
|
| 14 |
+
\label{tab:latency_results}
|
| 15 |
+
\begin{tabular}{lrr}
|
| 16 |
+
\hline
|
| 17 |
+
\textbf{Entity Type} & \textbf{Mean Latency (ms)} & \textbf{Std. Dev. (ms)} \\ \hline
|
| 18 |
+
Headline & 965.78 & 2969.16 \\
|
| 19 |
+
Summary & 785.70 & 141.60 \\
|
| 20 |
+
Skills (List) & 780.01 & 160.76 \\
|
| 21 |
+
Experience (List) & 1005.30 & 185.11 \\ \hline
|
| 22 |
+
\textbf{Total per Profile} & \textbf{3536.80} & -- \\ \hline
|
| 23 |
+
\end{tabular}
|
| 24 |
+
\end{table}
|
| 25 |
+
|
| 26 |
+
The average total processing time per profile is approximately 3.54 seconds, resulting in a throughput of \textbf{0.283 profiles per second}. While the Headline extraction shows high variance, possibly due to network latency or cold-start issues in the embedding service, the overall pipeline maintains a consistent performance suitable for near-real-time recruitment tasks.
|
| 27 |
+
|
| 28 |
+
\subsection{Retrieval Performance}
|
| 29 |
+
We compared the proposed IRIS matching methods against standard baselines using Mean Reciprocal Rank (MRR) and Recall@K ($R@k$). The evaluation included:
|
| 30 |
+
\begin{itemize}
|
| 31 |
+
\item \textbf{Jaccard Baseline}: A keyword-based overlap method.
|
| 32 |
+
\item \textbf{BERT Flattened}: Dense retrieval using BERT embeddings on concatenated profile text.
|
| 33 |
+
\item \textbf{BGE Flattened}: Dense retrieval using BGE-M3 embeddings on concatenated profile text.
|
| 34 |
+
\item \textbf{BGE Granular Weighted}: Our proposed method using weighted cosine similarity across specific entities.
|
| 35 |
+
\end{itemize}
|
| 36 |
+
|
| 37 |
+
Table~\ref{tab:retrieval_results} presents the results of this comparison.
|
| 38 |
+
|
| 39 |
+
\begin{table}[h]
|
| 40 |
+
\centering
|
| 41 |
+
\caption{Comparison of Retrieval Accuracy Metrics}
|
| 42 |
+
\label{tab:retrieval_results}
|
| 43 |
+
\begin{tabular}{lccc}
|
| 44 |
+
\hline
|
| 45 |
+
\textbf{Method} & \textbf{MRR} & \textbf{R@1} & \textbf{R@3} \\ \hline
|
| 46 |
+
Jaccard Baseline & 0.0755 & 0.016 & 0.048 \\
|
| 47 |
+
BERT Flattened & 0.1708 & 0.048 & \textbf{0.144} \\
|
| 48 |
+
BGE Flattened & \textbf{0.1729} & \textbf{0.048} & \textbf{0.144} \\
|
| 49 |
+
BGE Granular Weighted & 0.0749 & 0.016 & 0.040 \\ \hline
|
| 50 |
+
\end{tabular}
|
| 51 |
+
\end{table}
|
| 52 |
+
|
| 53 |
+
The results indicate that the \textbf{BGE Flattened} approach achieves the highest MRR (0.1729) and Recall@1/Recall@3. Notably, the granular weighted approach currently underperforms compared to the flattened embedding methods, suggesting that the aggregation logic or weight distribution for specific entities requires further optimization.
|
match_benchmark_results.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Jaccard_Baseline": {
|
| 3 |
+
"mrr": 0.07552527033230824,
|
| 4 |
+
"r1": 0.016,
|
| 5 |
+
"r3": 0.048
|
| 6 |
+
},
|
| 7 |
+
"BERT_Flattened": {
|
| 8 |
+
"mrr": 0.1688751043476369,
|
| 9 |
+
"r1": 0.048,
|
| 10 |
+
"r3": 0.144
|
| 11 |
+
},
|
| 12 |
+
"BGE_Flattened": {
|
| 13 |
+
"mrr": 0.17255959067443694,
|
| 14 |
+
"r1": 0.048,
|
| 15 |
+
"r3": 0.144
|
| 16 |
+
},
|
| 17 |
+
"BGE_Granular_Weighted": {
|
| 18 |
+
"mrr": 0.07297651022436405,
|
| 19 |
+
"r1": 0.012,
|
| 20 |
+
"r3": 0.044
|
| 21 |
+
}
|
| 22 |
+
}
|
matching_analysis_report.md
ADDED
|
File without changes
|
quality_metrics_adversarial.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "N=200 Adversarial (Noise + Distractors)",
|
| 3 |
+
"mrr": 0.70625,
|
| 4 |
+
"recall_1": 0.525,
|
| 5 |
+
"recall_3": 0.775
|
| 6 |
+
}
|
schema_dump.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--- Profile Embeddings ---
|
| 2 |
+
- certifications
|
| 3 |
+
- created_at
|
| 4 |
+
- education
|
| 5 |
+
- experience
|
| 6 |
+
- headline
|
| 7 |
+
- id
|
| 8 |
+
- projects
|
| 9 |
+
- skills
|
| 10 |
+
- summary
|
| 11 |
+
- technical_skills
|
| 12 |
+
- updated_at
|
| 13 |
+
|
| 14 |
+
--- Job Embeddings ---
|
| 15 |
+
- created_at
|
| 16 |
+
- education
|
| 17 |
+
- job_id
|
| 18 |
+
- skills
|
| 19 |
+
- technical_skills
|
| 20 |
+
- tools
|
| 21 |
+
- updated_at
|
| 22 |
+
- work_experience
|
src/components/Admin/AdminLayout.jsx
CHANGED
|
@@ -1,23 +1,24 @@
|
|
| 1 |
import React from 'react';
|
| 2 |
import { motion } from 'framer-motion';
|
| 3 |
-
import { supabase } from '../../supabaseClient';
|
| 4 |
|
| 5 |
// --- Icons ---
|
| 6 |
-
const HomeIcon = () => (
|
| 7 |
-
const BriefcaseIcon = () => (
|
| 8 |
-
const MessageSquareIcon = () => (
|
| 9 |
// ✅ UPDATED: Complete, robust Settings Icon (Gear)
|
| 10 |
-
const SettingsIcon = () => (
|
| 11 |
<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 12 |
<path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.38a2 2 0 0 0-.73-2.73l-.15-.1a2 2 0 0 1-1-1.72v-.51a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path>
|
| 13 |
<circle cx="12" cy="12" r="3"></circle>
|
| 14 |
-
</svg>
|
| 15 |
);
|
| 16 |
-
const BriefcasePlusIcon = () => (
|
| 17 |
-
const
|
|
|
|
| 18 |
|
| 19 |
export default function AdminLayout({ children, activeTab, setActiveTab, onNavigate }) {
|
| 20 |
-
|
| 21 |
// Global Logout Handler
|
| 22 |
const handleLogout = async () => {
|
| 23 |
const { error } = await supabase.auth.signOut();
|
|
@@ -27,7 +28,7 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
|
|
| 27 |
|
| 28 |
return (
|
| 29 |
<div style={{ height: '100vh', width: '100%', backgroundColor: '#020617', color: 'white', fontFamily: "'Montserrat', sans-serif", display: 'flex', position: 'relative', overflow: 'hidden' }}>
|
| 30 |
-
|
| 31 |
{/* Background Effects */}
|
| 32 |
<div style={{ position: 'fixed', top: 0, left: 0, right: 0, bottom: 0, zIndex: 0 }}>
|
| 33 |
<div style={{ position: 'absolute', borderRadius: '50%', filter: 'blur(80px)', opacity: 0.3, width: '400px', height: '400px', backgroundColor: '#EF4444', top: '-50px', left: '-100px' }}></div>
|
|
@@ -37,14 +38,15 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
|
|
| 37 |
{/* Sidebar */}
|
| 38 |
<aside style={{ width: '100px', padding: '2rem 0', display: 'flex', flexDirection: 'column', alignItems: 'center', zIndex: 10 }}>
|
| 39 |
<div style={{ fontSize: '1.5rem', fontWeight: 'bold', color: '#EF4444', marginBottom: '2rem' }}>IRIS</div>
|
| 40 |
-
<nav style={{
|
| 41 |
-
display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '1.5rem',
|
| 42 |
-
backgroundColor: 'rgba(239, 68, 68, 0.05)', border: '1px solid rgba(239, 68, 68, 0.2)',
|
| 43 |
-
borderRadius: '9999px', padding: '2rem 1rem'
|
| 44 |
}}>
|
| 45 |
<NavButton active={activeTab === 'dashboard'} onClick={() => setActiveTab('dashboard')} icon={<HomeIcon />} />
|
| 46 |
<NavButton active={activeTab === 'job-management'} onClick={() => setActiveTab('job-management')} icon={<BriefcasePlusIcon />} />
|
| 47 |
<NavButton active={activeTab === 'jobs'} onClick={() => setActiveTab('jobs')} icon={<BriefcaseIcon />} />
|
|
|
|
| 48 |
<NavButton active={activeTab === 'messages'} onClick={() => setActiveTab('messages')} icon={<MessageSquareIcon />} />
|
| 49 |
<NavButton active={activeTab === 'settings'} onClick={() => setActiveTab('settings')} icon={<SettingsIcon />} />
|
| 50 |
</nav>
|
|
@@ -52,26 +54,26 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
|
|
| 52 |
|
| 53 |
{/* Main Content Area */}
|
| 54 |
<div style={{ flex: 1, padding: '2rem', overflowY: 'auto', height: '100vh', boxSizing: 'border-box', position: 'relative', zIndex: 1 }}>
|
| 55 |
-
|
| 56 |
{/* ✅ GLOBAL LOGOUT BUTTON - Updated Styles for Alignment */}
|
| 57 |
<div style={{ position: 'absolute', top: '2rem', right: '2rem', zIndex: 50 }}>
|
| 58 |
-
<motion.button
|
| 59 |
-
onClick={handleLogout}
|
| 60 |
-
whileHover={{ scale: 1.05 }}
|
| 61 |
-
whileTap={{ scale: 0.95 }}
|
| 62 |
-
style={{
|
| 63 |
-
backgroundColor: '#EF4444',
|
| 64 |
-
color: 'white',
|
| 65 |
-
display: 'flex',
|
| 66 |
-
alignItems: 'center',
|
| 67 |
justifyContent: 'center',
|
| 68 |
-
padding: '0.75rem 1.5rem',
|
| 69 |
-
borderRadius: '0.5rem',
|
| 70 |
-
fontWeight: 'bold',
|
| 71 |
-
cursor: 'pointer',
|
| 72 |
-
border: 'none',
|
| 73 |
// Matches the visual weight of "Post New Job"
|
| 74 |
-
minWidth: '160px'
|
| 75 |
}}
|
| 76 |
>
|
| 77 |
<LogoutIcon /> Logout
|
|
@@ -86,10 +88,10 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
|
|
| 86 |
|
| 87 |
// Helper Component for Navigation Buttons
|
| 88 |
const NavButton = ({ active, onClick, icon }) => (
|
| 89 |
-
<motion.button
|
| 90 |
-
whileHover={{ scale: 1.1 }}
|
| 91 |
-
whileTap={{ scale: 0.9 }}
|
| 92 |
-
onClick={onClick}
|
| 93 |
style={{ background: 'none', border: 'none', color: active ? '#EF4444' : '#d1d5db', cursor: 'pointer' }}
|
| 94 |
>
|
| 95 |
{icon}
|
|
|
|
| 1 |
import React from 'react';
|
| 2 |
import { motion } from 'framer-motion';
|
| 3 |
+
import { supabase } from '../../supabaseClient';
|
| 4 |
|
| 5 |
// --- Icons ---
|
| 6 |
+
const HomeIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"></path><polyline points="9 22 9 12 15 12 15 22"></polyline></svg>);
|
| 7 |
+
const BriefcaseIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path></svg>);
|
| 8 |
+
const MessageSquareIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg>);
|
| 9 |
// ✅ UPDATED: Complete, robust Settings Icon (Gear)
|
| 10 |
+
const SettingsIcon = () => (
|
| 11 |
<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 12 |
<path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.38a2 2 0 0 0-.73-2.73l-.15-.1a2 2 0 0 1-1-1.72v-.51a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path>
|
| 13 |
<circle cx="12" cy="12" r="3"></circle>
|
| 14 |
+
</svg>
|
| 15 |
);
|
| 16 |
+
const BriefcasePlusIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><line x1="12" y1="11" x2="12" y2="17"></line><line x1="9" y1="14" x2="15" y2="14"></line></svg>);
|
| 17 |
+
const ClustersIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><circle cx="12" cy="12" r="3" /><circle cx="4" cy="6" r="2" /><circle cx="20" cy="6" r="2" /><circle cx="4" cy="18" r="2" /><circle cx="20" cy="18" r="2" /><line x1="12" y1="9" x2="5" y2="7" /><line x1="12" y1="9" x2="19" y2="7" /><line x1="12" y1="15" x2="5" y2="17" /><line x1="12" y1="15" x2="19" y2="17" /></svg>);
|
| 18 |
+
const LogoutIcon = () => (<svg style={{ width: '20px', height: '20px', marginRight: '8px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M9 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h4"></path><polyline points="16 17 21 12 16 7"></polyline><line x1="21" y1="12" x2="9" y2="12"></line></svg>);
|
| 19 |
|
| 20 |
export default function AdminLayout({ children, activeTab, setActiveTab, onNavigate }) {
|
| 21 |
+
|
| 22 |
// Global Logout Handler
|
| 23 |
const handleLogout = async () => {
|
| 24 |
const { error } = await supabase.auth.signOut();
|
|
|
|
| 28 |
|
| 29 |
return (
|
| 30 |
<div style={{ height: '100vh', width: '100%', backgroundColor: '#020617', color: 'white', fontFamily: "'Montserrat', sans-serif", display: 'flex', position: 'relative', overflow: 'hidden' }}>
|
| 31 |
+
|
| 32 |
{/* Background Effects */}
|
| 33 |
<div style={{ position: 'fixed', top: 0, left: 0, right: 0, bottom: 0, zIndex: 0 }}>
|
| 34 |
<div style={{ position: 'absolute', borderRadius: '50%', filter: 'blur(80px)', opacity: 0.3, width: '400px', height: '400px', backgroundColor: '#EF4444', top: '-50px', left: '-100px' }}></div>
|
|
|
|
| 38 |
{/* Sidebar */}
|
| 39 |
<aside style={{ width: '100px', padding: '2rem 0', display: 'flex', flexDirection: 'column', alignItems: 'center', zIndex: 10 }}>
|
| 40 |
<div style={{ fontSize: '1.5rem', fontWeight: 'bold', color: '#EF4444', marginBottom: '2rem' }}>IRIS</div>
|
| 41 |
+
<nav style={{
|
| 42 |
+
display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '1.5rem',
|
| 43 |
+
backgroundColor: 'rgba(239, 68, 68, 0.05)', border: '1px solid rgba(239, 68, 68, 0.2)',
|
| 44 |
+
borderRadius: '9999px', padding: '2rem 1rem'
|
| 45 |
}}>
|
| 46 |
<NavButton active={activeTab === 'dashboard'} onClick={() => setActiveTab('dashboard')} icon={<HomeIcon />} />
|
| 47 |
<NavButton active={activeTab === 'job-management'} onClick={() => setActiveTab('job-management')} icon={<BriefcasePlusIcon />} />
|
| 48 |
<NavButton active={activeTab === 'jobs'} onClick={() => setActiveTab('jobs')} icon={<BriefcaseIcon />} />
|
| 49 |
+
<NavButton active={activeTab === 'clusters'} onClick={() => setActiveTab('clusters')} icon={<ClustersIcon />} />
|
| 50 |
<NavButton active={activeTab === 'messages'} onClick={() => setActiveTab('messages')} icon={<MessageSquareIcon />} />
|
| 51 |
<NavButton active={activeTab === 'settings'} onClick={() => setActiveTab('settings')} icon={<SettingsIcon />} />
|
| 52 |
</nav>
|
|
|
|
| 54 |
|
| 55 |
{/* Main Content Area */}
|
| 56 |
<div style={{ flex: 1, padding: '2rem', overflowY: 'auto', height: '100vh', boxSizing: 'border-box', position: 'relative', zIndex: 1 }}>
|
| 57 |
+
|
| 58 |
{/* ✅ GLOBAL LOGOUT BUTTON - Updated Styles for Alignment */}
|
| 59 |
<div style={{ position: 'absolute', top: '2rem', right: '2rem', zIndex: 50 }}>
|
| 60 |
+
<motion.button
|
| 61 |
+
onClick={handleLogout}
|
| 62 |
+
whileHover={{ scale: 1.05 }}
|
| 63 |
+
whileTap={{ scale: 0.95 }}
|
| 64 |
+
style={{
|
| 65 |
+
backgroundColor: '#EF4444',
|
| 66 |
+
color: 'white',
|
| 67 |
+
display: 'flex',
|
| 68 |
+
alignItems: 'center',
|
| 69 |
justifyContent: 'center',
|
| 70 |
+
padding: '0.75rem 1.5rem',
|
| 71 |
+
borderRadius: '0.5rem',
|
| 72 |
+
fontWeight: 'bold',
|
| 73 |
+
cursor: 'pointer',
|
| 74 |
+
border: 'none',
|
| 75 |
// Matches the visual weight of "Post New Job"
|
| 76 |
+
minWidth: '160px'
|
| 77 |
}}
|
| 78 |
>
|
| 79 |
<LogoutIcon /> Logout
|
|
|
|
| 88 |
|
| 89 |
// Helper Component for Navigation Buttons
|
| 90 |
const NavButton = ({ active, onClick, icon }) => (
|
| 91 |
+
<motion.button
|
| 92 |
+
whileHover={{ scale: 1.1 }}
|
| 93 |
+
whileTap={{ scale: 0.9 }}
|
| 94 |
+
onClick={onClick}
|
| 95 |
style={{ background: 'none', border: 'none', color: active ? '#EF4444' : '#d1d5db', cursor: 'pointer' }}
|
| 96 |
>
|
| 97 |
{icon}
|
src/components/Admin/TalentClusters.jsx
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React, { useState, useEffect } from 'react';
|
| 2 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { supabase } from '../../supabaseClient';
|
| 4 |
+
import FullProfileOverlay from '../FullProfileOverlay';
|
| 5 |
+
|
| 6 |
+
// ─── Icons ───────────────────────────────────────────────────────────────────
|
| 7 |
+
const ClusterIcon = () => (
|
| 8 |
+
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 9 |
+
<circle cx="12" cy="12" r="3" /><circle cx="4" cy="6" r="3" /><circle cx="20" cy="6" r="3" />
|
| 10 |
+
<circle cx="4" cy="18" r="3" /><circle cx="20" cy="18" r="3" />
|
| 11 |
+
<line x1="12" y1="9" x2="4" y2="7" /><line x1="12" y1="9" x2="20" y2="7" />
|
| 12 |
+
<line x1="12" y1="15" x2="4" y2="17" /><line x1="12" y1="15" x2="20" y2="17" />
|
| 13 |
+
</svg>
|
| 14 |
+
);
|
| 15 |
+
|
| 16 |
+
const UsersIcon = () => (
|
| 17 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 18 |
+
<path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2" /><circle cx="9" cy="7" r="4" />
|
| 19 |
+
<path d="M23 21v-2a4 4 0 0 0-3-3.87" /><path d="M16 3.13a4 4 0 0 1 0 7.75" />
|
| 20 |
+
</svg>
|
| 21 |
+
);
|
| 22 |
+
|
| 23 |
+
const SearchIcon = () => (
|
| 24 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 25 |
+
<circle cx="11" cy="11" r="8" /><line x1="21" y1="21" x2="16.65" y2="16.65" />
|
| 26 |
+
</svg>
|
| 27 |
+
);
|
| 28 |
+
|
| 29 |
+
const ChevronDown = ({ open }) => (
|
| 30 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2.5"
|
| 31 |
+
style={{ transform: open ? 'rotate(180deg)' : 'rotate(0deg)', transition: 'transform 0.3s ease' }}>
|
| 32 |
+
<polyline points="6 9 12 15 18 9" />
|
| 33 |
+
</svg>
|
| 34 |
+
);
|
| 35 |
+
|
| 36 |
+
const XIcon = () => (
|
| 37 |
+
<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
|
| 38 |
+
<line x1="18" y1="6" x2="6" y2="18" /><line x1="6" y1="6" x2="18" y2="18" />
|
| 39 |
+
</svg>
|
| 40 |
+
);
|
| 41 |
+
|
| 42 |
+
// ─── Cluster colour palette ───────────────────────────────────────────────────
|
| 43 |
+
const CLUSTER_COLORS = [
|
| 44 |
+
{ accent: '#EF4444', glow: 'rgba(239,68,68,0.15)', border: 'rgba(239,68,68,0.3)' },
|
| 45 |
+
{ accent: '#8B5CF6', glow: 'rgba(139,92,246,0.15)', border: 'rgba(139,92,246,0.3)' },
|
| 46 |
+
{ accent: '#06B6D4', glow: 'rgba(6,182,212,0.15)', border: 'rgba(6,182,212,0.3)' },
|
| 47 |
+
{ accent: '#10B981', glow: 'rgba(16,185,129,0.15)', border: 'rgba(16,185,129,0.3)' },
|
| 48 |
+
{ accent: '#F59E0B', glow: 'rgba(245,158,11,0.15)', border: 'rgba(245,158,11,0.3)' },
|
| 49 |
+
{ accent: '#EC4899', glow: 'rgba(236,72,153,0.15)', border: 'rgba(236,72,153,0.3)' },
|
| 50 |
+
];
|
| 51 |
+
|
| 52 |
+
const getColor = (idx) => CLUSTER_COLORS[idx % CLUSTER_COLORS.length];
|
| 53 |
+
|
| 54 |
+
// ─── Profile Card ─────────────────────────────────────────────────────────────
|
| 55 |
+
const ProfileCard = ({ profile, accent, onView }) => {
|
| 56 |
+
const [hovered, setHovered] = useState(false);
|
| 57 |
+
const skills = Array.isArray(profile.technical_skills)
|
| 58 |
+
? profile.technical_skills.slice(0, 4)
|
| 59 |
+
: typeof profile.technical_skills === 'string'
|
| 60 |
+
? profile.technical_skills.split(',').slice(0, 4).map(s => s.trim())
|
| 61 |
+
: [];
|
| 62 |
+
|
| 63 |
+
return (
|
| 64 |
+
<motion.div
|
| 65 |
+
onMouseEnter={() => setHovered(true)}
|
| 66 |
+
onMouseLeave={() => setHovered(false)}
|
| 67 |
+
whileHover={{ y: -4, scale: 1.01 }}
|
| 68 |
+
onClick={() => onView(profile)}
|
| 69 |
+
style={{
|
| 70 |
+
backgroundColor: hovered ? 'rgba(255,255,255,0.06)' : 'rgba(255,255,255,0.03)',
|
| 71 |
+
border: `1px solid ${hovered ? accent : 'rgba(255,255,255,0.08)'}`,
|
| 72 |
+
borderRadius: '12px',
|
| 73 |
+
padding: '1rem',
|
| 74 |
+
cursor: 'pointer',
|
| 75 |
+
transition: 'border-color 0.2s',
|
| 76 |
+
boxShadow: hovered ? `0 4px 20px ${accent}30` : 'none',
|
| 77 |
+
}}
|
| 78 |
+
>
|
| 79 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', marginBottom: '0.6rem' }}>
|
| 80 |
+
<img
|
| 81 |
+
src={profile.avatar_url || `https://ui-avatars.com/api/?name=${encodeURIComponent(profile.full_name || 'User')}&background=random&size=48`}
|
| 82 |
+
alt={profile.full_name}
|
| 83 |
+
style={{ width: 40, height: 40, borderRadius: '50%', objectFit: 'cover', border: `2px solid ${accent}55` }}
|
| 84 |
+
/>
|
| 85 |
+
<div>
|
| 86 |
+
<p style={{ fontWeight: '700', color: '#fff', fontSize: '0.9rem', marginBottom: 2 }}>{profile.full_name || 'Unknown'}</p>
|
| 87 |
+
<p style={{ fontSize: '0.75rem', color: '#94a3b8' }}>{profile.headline || profile.role || '—'}</p>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
|
| 91 |
+
<p style={{ fontSize: '0.75rem', color: '#64748b', marginBottom: '0.5rem' }}>
|
| 92 |
+
{profile.experience_years ? `${profile.experience_years} yrs exp` : 'No experience listed'}
|
| 93 |
+
</p>
|
| 94 |
+
|
| 95 |
+
{skills.length > 0 && (
|
| 96 |
+
<div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.3rem' }}>
|
| 97 |
+
{skills.map((s, i) => (
|
| 98 |
+
<span key={i} style={{
|
| 99 |
+
fontSize: '0.7rem', padding: '2px 8px', borderRadius: '4px',
|
| 100 |
+
backgroundColor: `${accent}20`, color: accent,
|
| 101 |
+
border: `1px solid ${accent}40`
|
| 102 |
+
}}>{s}</span>
|
| 103 |
+
))}
|
| 104 |
+
</div>
|
| 105 |
+
)}
|
| 106 |
+
</motion.div>
|
| 107 |
+
);
|
| 108 |
+
};
|
| 109 |
+
|
| 110 |
+
// ─── Cluster Card ─────────────────────────────────────────────────────────────
|
| 111 |
+
const ClusterCard = ({ label, profiles, colorIdx, searchQuery, onViewProfile }) => {
|
| 112 |
+
const [expanded, setExpanded] = useState(true);
|
| 113 |
+
const color = getColor(colorIdx);
|
| 114 |
+
|
| 115 |
+
const filtered = profiles.filter(p => {
|
| 116 |
+
const q = searchQuery.toLowerCase();
|
| 117 |
+
return (
|
| 118 |
+
(p.full_name || '').toLowerCase().includes(q) ||
|
| 119 |
+
(p.headline || '').toLowerCase().includes(q) ||
|
| 120 |
+
(p.role || '').toLowerCase().includes(q)
|
| 121 |
+
);
|
| 122 |
+
});
|
| 123 |
+
|
| 124 |
+
if (searchQuery && filtered.length === 0) return null;
|
| 125 |
+
|
| 126 |
+
return (
|
| 127 |
+
<motion.div
|
| 128 |
+
initial={{ opacity: 0, y: 20 }}
|
| 129 |
+
animate={{ opacity: 1, y: 0 }}
|
| 130 |
+
style={{
|
| 131 |
+
backgroundColor: color.glow,
|
| 132 |
+
border: `1px solid ${color.border}`,
|
| 133 |
+
borderRadius: '16px',
|
| 134 |
+
overflow: 'hidden',
|
| 135 |
+
marginBottom: '1.5rem',
|
| 136 |
+
}}
|
| 137 |
+
>
|
| 138 |
+
{/* Header */}
|
| 139 |
+
<button
|
| 140 |
+
onClick={() => setExpanded(e => !e)}
|
| 141 |
+
style={{
|
| 142 |
+
width: '100%', background: 'none', border: 'none', cursor: 'pointer',
|
| 143 |
+
padding: '1.25rem 1.5rem',
|
| 144 |
+
display: 'flex', alignItems: 'center', justifyContent: 'space-between',
|
| 145 |
+
color: '#fff',
|
| 146 |
+
}}
|
| 147 |
+
>
|
| 148 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem' }}>
|
| 149 |
+
<div style={{
|
| 150 |
+
width: 36, height: 36, borderRadius: '10px',
|
| 151 |
+
backgroundColor: `${color.accent}22`, display: 'flex', alignItems: 'center', justifyContent: 'center',
|
| 152 |
+
border: `1px solid ${color.accent}55`
|
| 153 |
+
}}>
|
| 154 |
+
<ClusterIcon style={{ color: color.accent }} />
|
| 155 |
+
</div>
|
| 156 |
+
<div style={{ textAlign: 'left' }}>
|
| 157 |
+
<h3 style={{ fontSize: '1.05rem', fontWeight: '700', color: '#fff', margin: 0 }}>{label}</h3>
|
| 158 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '4px', color: '#94a3b8', fontSize: '0.8rem', marginTop: 2 }}>
|
| 159 |
+
<UsersIcon />
|
| 160 |
+
<span>{filtered.length} {filtered.length === 1 ? 'profile' : 'profiles'}</span>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
<div style={{ color: color.accent }}>
|
| 165 |
+
<ChevronDown open={expanded} />
|
| 166 |
+
</div>
|
| 167 |
+
</button>
|
| 168 |
+
|
| 169 |
+
{/* Body */}
|
| 170 |
+
<AnimatePresence initial={false}>
|
| 171 |
+
{expanded && (
|
| 172 |
+
<motion.div
|
| 173 |
+
key="body"
|
| 174 |
+
initial={{ height: 0, opacity: 0 }}
|
| 175 |
+
animate={{ height: 'auto', opacity: 1 }}
|
| 176 |
+
exit={{ height: 0, opacity: 0 }}
|
| 177 |
+
transition={{ duration: 0.3 }}
|
| 178 |
+
style={{ overflow: 'hidden' }}
|
| 179 |
+
>
|
| 180 |
+
<div style={{
|
| 181 |
+
padding: '0 1.5rem 1.5rem',
|
| 182 |
+
display: 'grid',
|
| 183 |
+
gridTemplateColumns: 'repeat(auto-fill, minmax(220px, 1fr))',
|
| 184 |
+
gap: '0.75rem'
|
| 185 |
+
}}>
|
| 186 |
+
{filtered.map(p => (
|
| 187 |
+
<ProfileCard
|
| 188 |
+
key={p.id}
|
| 189 |
+
profile={p}
|
| 190 |
+
accent={color.accent}
|
| 191 |
+
onView={onViewProfile}
|
| 192 |
+
/>
|
| 193 |
+
))}
|
| 194 |
+
</div>
|
| 195 |
+
</motion.div>
|
| 196 |
+
)}
|
| 197 |
+
</AnimatePresence>
|
| 198 |
+
</motion.div>
|
| 199 |
+
);
|
| 200 |
+
};
|
| 201 |
+
|
| 202 |
+
// ─── Profile Detail Modal ────────────────────��────────────────────────────────
|
| 203 |
+
const ProfileModal = ({ profile, onClose }) => {
|
| 204 |
+
if (!profile) return null;
|
| 205 |
+
const skills = Array.isArray(profile.technical_skills)
|
| 206 |
+
? profile.technical_skills
|
| 207 |
+
: typeof profile.technical_skills === 'string'
|
| 208 |
+
? profile.technical_skills.split(',').map(s => s.trim())
|
| 209 |
+
: [];
|
| 210 |
+
|
| 211 |
+
return (
|
| 212 |
+
<AnimatePresence>
|
| 213 |
+
<motion.div
|
| 214 |
+
initial={{ opacity: 0 }}
|
| 215 |
+
animate={{ opacity: 1 }}
|
| 216 |
+
exit={{ opacity: 0 }}
|
| 217 |
+
onClick={onClose}
|
| 218 |
+
style={{
|
| 219 |
+
position: 'fixed', inset: 0, backgroundColor: 'rgba(0,0,0,0.7)',
|
| 220 |
+
backdropFilter: 'blur(6px)', zIndex: 100, display: 'flex',
|
| 221 |
+
alignItems: 'center', justifyContent: 'center', padding: '1rem'
|
| 222 |
+
}}
|
| 223 |
+
>
|
| 224 |
+
<motion.div
|
| 225 |
+
initial={{ scale: 0.9, opacity: 0 }}
|
| 226 |
+
animate={{ scale: 1, opacity: 1 }}
|
| 227 |
+
exit={{ scale: 0.9, opacity: 0 }}
|
| 228 |
+
onClick={e => e.stopPropagation()}
|
| 229 |
+
style={{
|
| 230 |
+
backgroundColor: '#0f172a',
|
| 231 |
+
backgroundImage: `
|
| 232 |
+
radial-gradient(at 0% 0%, rgba(139,92,246,0.2) 0px, transparent 50%),
|
| 233 |
+
radial-gradient(at 100% 100%, rgba(239,68,68,0.2) 0px, transparent 50%)
|
| 234 |
+
`,
|
| 235 |
+
border: '1px solid rgba(255,255,255,0.1)',
|
| 236 |
+
borderRadius: '20px',
|
| 237 |
+
width: '100%', maxWidth: '540px',
|
| 238 |
+
maxHeight: '80vh', overflowY: 'auto',
|
| 239 |
+
boxShadow: '0 25px 50px rgba(0,0,0,0.5)',
|
| 240 |
+
padding: '2rem',
|
| 241 |
+
}}
|
| 242 |
+
>
|
| 243 |
+
{/* Close */}
|
| 244 |
+
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '1.5rem' }}>
|
| 245 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '1rem' }}>
|
| 246 |
+
<img
|
| 247 |
+
src={profile.avatar_url || `https://ui-avatars.com/api/?name=${encodeURIComponent(profile.full_name || 'User')}&background=random&size=80`}
|
| 248 |
+
alt={profile.full_name}
|
| 249 |
+
style={{ width: 56, height: 56, borderRadius: '50%', objectFit: 'cover', border: '2px solid rgba(239,68,68,0.4)' }}
|
| 250 |
+
/>
|
| 251 |
+
<div>
|
| 252 |
+
<h2 style={{ fontSize: '1.4rem', fontWeight: '800', color: '#fff', margin: 0 }}>{profile.full_name}</h2>
|
| 253 |
+
<p style={{ color: '#94a3b8', fontSize: '0.85rem', margin: 0 }}>{profile.headline || profile.role || '—'}</p>
|
| 254 |
+
</div>
|
| 255 |
+
</div>
|
| 256 |
+
<button onClick={onClose} style={{ background: 'none', border: 'none', color: '#64748b', cursor: 'pointer' }}>
|
| 257 |
+
<XIcon />
|
| 258 |
+
</button>
|
| 259 |
+
</div>
|
| 260 |
+
|
| 261 |
+
{/* Stats Row */}
|
| 262 |
+
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, 1fr)', gap: '0.75rem', marginBottom: '1.5rem' }}>
|
| 263 |
+
{[
|
| 264 |
+
{ label: 'Experience', value: profile.experience_years ? `${profile.experience_years} yrs` : '—' },
|
| 265 |
+
{ label: 'Cluster', value: profile.cluster_label || '—' },
|
| 266 |
+
{ label: 'Email', value: profile.email ? profile.email.split('@')[0] : '—' },
|
| 267 |
+
].map(({ label, value }) => (
|
| 268 |
+
<div key={label} style={{
|
| 269 |
+
backgroundColor: 'rgba(255,255,255,0.05)', borderRadius: '10px',
|
| 270 |
+
padding: '0.75rem', border: '1px solid rgba(255,255,255,0.08)'
|
| 271 |
+
}}>
|
| 272 |
+
<p style={{ fontSize: '0.7rem', color: '#64748b', textTransform: 'uppercase', letterSpacing: '0.05em', marginBottom: 4 }}>{label}</p>
|
| 273 |
+
<p style={{ fontSize: '0.85rem', fontWeight: '600', color: '#e2e8f0', wordBreak: 'break-all' }}>{value}</p>
|
| 274 |
+
</div>
|
| 275 |
+
))}
|
| 276 |
+
</div>
|
| 277 |
+
|
| 278 |
+
{/* Summary */}
|
| 279 |
+
{profile.summary && (
|
| 280 |
+
<div style={{ marginBottom: '1.5rem' }}>
|
| 281 |
+
<h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Summary</h4>
|
| 282 |
+
<p style={{ fontSize: '0.9rem', lineHeight: '1.6', color: '#cbd5e1', backgroundColor: 'rgba(255,255,255,0.04)', padding: '0.75rem', borderRadius: '8px', border: '1px solid rgba(255,255,255,0.05)' }}>
|
| 283 |
+
{profile.summary}
|
| 284 |
+
</p>
|
| 285 |
+
</div>
|
| 286 |
+
)}
|
| 287 |
+
|
| 288 |
+
{/* Skills */}
|
| 289 |
+
{skills.length > 0 && (
|
| 290 |
+
<div style={{ marginBottom: '1.5rem' }}>
|
| 291 |
+
<h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Technical Skills</h4>
|
| 292 |
+
<div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.4rem' }}>
|
| 293 |
+
{skills.map((s, i) => (
|
| 294 |
+
<span key={i} style={{
|
| 295 |
+
fontSize: '0.8rem', padding: '4px 10px', borderRadius: '6px',
|
| 296 |
+
backgroundColor: 'rgba(239,68,68,0.1)', color: '#EF4444',
|
| 297 |
+
border: '1px solid rgba(239,68,68,0.2)'
|
| 298 |
+
}}>{s}</span>
|
| 299 |
+
))}
|
| 300 |
+
</div>
|
| 301 |
+
</div>
|
| 302 |
+
)}
|
| 303 |
+
|
| 304 |
+
{/* Education */}
|
| 305 |
+
{profile.education && (
|
| 306 |
+
<div>
|
| 307 |
+
<h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Education</h4>
|
| 308 |
+
<p style={{ fontSize: '0.85rem', color: '#cbd5e1' }}>
|
| 309 |
+
{typeof profile.education === 'string' ? profile.education : JSON.stringify(profile.education)}
|
| 310 |
+
</p>
|
| 311 |
+
</div>
|
| 312 |
+
)}
|
| 313 |
+
</motion.div>
|
| 314 |
+
</motion.div>
|
| 315 |
+
</AnimatePresence>
|
| 316 |
+
);
|
| 317 |
+
};
|
| 318 |
+
|
| 319 |
+
// ─── MAIN PAGE ────────────────────────────────────────────────────────────────
|
| 320 |
+
export default function TalentClusters() {
|
| 321 |
+
const [clusters, setClusters] = useState({}); // { labelName: [profiles] }
|
| 322 |
+
const [isLoading, setIsLoading] = useState(true);
|
| 323 |
+
const [searchQuery, setSearchQuery] = useState('');
|
| 324 |
+
const [selectedProfile, setSelectedProfile] = useState(null);
|
| 325 |
+
const [error, setError] = useState(null);
|
| 326 |
+
|
| 327 |
+
useEffect(() => {
|
| 328 |
+
fetchClusters();
|
| 329 |
+
}, []);
|
| 330 |
+
|
| 331 |
+
const fetchClusters = async () => {
|
| 332 |
+
setIsLoading(true);
|
| 333 |
+
setError(null);
|
| 334 |
+
try {
|
| 335 |
+
const { data, error } = await supabase
|
| 336 |
+
.from('profiles')
|
| 337 |
+
.select('id, full_name, email, avatar_url, headline, role, experience_years, technical_skills, summary, education, cluster_label')
|
| 338 |
+
.not('cluster_label', 'is', null);
|
| 339 |
+
|
| 340 |
+
if (error) throw error;
|
| 341 |
+
|
| 342 |
+
// Group by cluster_label
|
| 343 |
+
const grouped = {};
|
| 344 |
+
data.forEach(profile => {
|
| 345 |
+
const label = profile.cluster_label || 'Uncategorized';
|
| 346 |
+
if (!grouped[label]) grouped[label] = [];
|
| 347 |
+
grouped[label].push(profile);
|
| 348 |
+
});
|
| 349 |
+
|
| 350 |
+
setClusters(grouped);
|
| 351 |
+
} catch (err) {
|
| 352 |
+
console.error('Failed to fetch clusters:', err);
|
| 353 |
+
setError('Failed to load talent clusters. Please try again.');
|
| 354 |
+
} finally {
|
| 355 |
+
setIsLoading(false);
|
| 356 |
+
}
|
| 357 |
+
};
|
| 358 |
+
|
| 359 |
+
const clusterEntries = Object.entries(clusters).sort((a, b) => b[1].length - a[1].length);
|
| 360 |
+
const totalProfiles = Object.values(clusters).reduce((s, arr) => s + arr.length, 0);
|
| 361 |
+
|
| 362 |
+
return (
|
| 363 |
+
<div style={{ paddingBottom: '4rem' }}>
|
| 364 |
+
<style>{`
|
| 365 |
+
.hide-scrollbar::-webkit-scrollbar { display: none; }
|
| 366 |
+
.hide-scrollbar { -ms-overflow-style: none; scrollbar-width: none; }
|
| 367 |
+
@keyframes spin { 100% { transform: rotate(360deg); } }
|
| 368 |
+
@keyframes pulse-dot { 0%,100% { opacity: 1; } 50% { opacity: 0.3; } }
|
| 369 |
+
`}</style>
|
| 370 |
+
|
| 371 |
+
{/* Header */}
|
| 372 |
+
<header style={{ marginBottom: '2rem' }}>
|
| 373 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', marginBottom: '0.5rem' }}>
|
| 374 |
+
<div style={{ color: '#EF4444' }}><ClusterIcon /></div>
|
| 375 |
+
<h1 style={{ fontSize: '1.875rem', fontWeight: 'bold', margin: 0 }}>Talent Clusters</h1>
|
| 376 |
+
</div>
|
| 377 |
+
<p style={{ color: '#64748b', fontSize: '0.9rem' }}>
|
| 378 |
+
AI-grouped candidate profiles based on skills and experience similarity.
|
| 379 |
+
</p>
|
| 380 |
+
</header>
|
| 381 |
+
|
| 382 |
+
{/* Stats Bar */}
|
| 383 |
+
<div style={{ display: 'flex', gap: '1rem', marginBottom: '2rem', flexWrap: 'wrap' }}>
|
| 384 |
+
{[
|
| 385 |
+
{ label: 'Total Clusters', value: clusterEntries.length, color: '#EF4444' },
|
| 386 |
+
{ label: 'Total Profiles', value: totalProfiles, color: '#8B5CF6' },
|
| 387 |
+
{ label: 'Avg. Cluster Size', value: clusterEntries.length ? Math.round(totalProfiles / clusterEntries.length) : 0, color: '#06B6D4' },
|
| 388 |
+
].map(({ label, value, color }) => (
|
| 389 |
+
<div key={label} style={{
|
| 390 |
+
flex: 1, minWidth: 140,
|
| 391 |
+
backgroundColor: 'rgba(255,255,255,0.03)',
|
| 392 |
+
border: '1px solid rgba(255,255,255,0.08)',
|
| 393 |
+
borderRadius: '12px', padding: '1rem 1.25rem',
|
| 394 |
+
}}>
|
| 395 |
+
<p style={{ fontSize: '0.75rem', color: '#64748b', textTransform: 'uppercase', letterSpacing: '0.05em', marginBottom: 4 }}>{label}</p>
|
| 396 |
+
<p style={{ fontSize: '1.8rem', fontWeight: '800', color, margin: 0, lineHeight: 1 }}>{isLoading ? '—' : value}</p>
|
| 397 |
+
</div>
|
| 398 |
+
))}
|
| 399 |
+
</div>
|
| 400 |
+
|
| 401 |
+
{/* Search + Refresh */}
|
| 402 |
+
<div style={{ display: 'flex', gap: '0.75rem', marginBottom: '2rem', alignItems: 'center' }}>
|
| 403 |
+
<div style={{ position: 'relative', flexGrow: 1 }}>
|
| 404 |
+
<div style={{ position: 'absolute', left: 12, top: '50%', transform: 'translateY(-50%)', color: '#64748b' }}>
|
| 405 |
+
<SearchIcon />
|
| 406 |
+
</div>
|
| 407 |
+
<input
|
| 408 |
+
type="text"
|
| 409 |
+
placeholder="Search by name, role, or headline..."
|
| 410 |
+
value={searchQuery}
|
| 411 |
+
onChange={e => setSearchQuery(e.target.value)}
|
| 412 |
+
style={{
|
| 413 |
+
width: '100%', padding: '0.75rem 0.75rem 0.75rem 2.25rem',
|
| 414 |
+
borderRadius: '0.5rem', border: '1px solid rgba(239,68,68,0.3)',
|
| 415 |
+
backgroundColor: 'rgba(255,255,255,0.04)', color: 'white',
|
| 416 |
+
fontSize: '0.9rem', outline: 'none', boxSizing: 'border-box'
|
| 417 |
+
}}
|
| 418 |
+
/>
|
| 419 |
+
</div>
|
| 420 |
+
<motion.button
|
| 421 |
+
onClick={fetchClusters}
|
| 422 |
+
whileHover={{ scale: 1.04 }}
|
| 423 |
+
whileTap={{ scale: 0.96 }}
|
| 424 |
+
style={{
|
| 425 |
+
backgroundColor: 'rgba(239,68,68,0.15)', border: '1px solid rgba(239,68,68,0.4)',
|
| 426 |
+
color: '#EF4444', padding: '0.75rem 1.25rem', borderRadius: '0.5rem',
|
| 427 |
+
cursor: 'pointer', fontWeight: '600', fontSize: '0.85rem', whiteSpace: 'nowrap'
|
| 428 |
+
}}
|
| 429 |
+
>
|
| 430 |
+
↻ Refresh
|
| 431 |
+
</motion.button>
|
| 432 |
+
</div>
|
| 433 |
+
|
| 434 |
+
{/* Content */}
|
| 435 |
+
{isLoading ? (
|
| 436 |
+
<div style={{ display: 'flex', flexDirection: 'column', alignItems: 'center', justifyContent: 'center', height: '300px', gap: '1rem' }}>
|
| 437 |
+
<div style={{
|
| 438 |
+
width: 40, height: 40, border: '3px solid rgba(239,68,68,0.2)',
|
| 439 |
+
borderTopColor: '#EF4444', borderRadius: '50%',
|
| 440 |
+
animation: 'spin 0.8s linear infinite'
|
| 441 |
+
}} />
|
| 442 |
+
<p style={{ color: '#64748b' }}>Loading talent clusters…</p>
|
| 443 |
+
</div>
|
| 444 |
+
) : error ? (
|
| 445 |
+
<div style={{ textAlign: 'center', padding: '3rem', color: '#EF4444' }}>
|
| 446 |
+
<p>{error}</p>
|
| 447 |
+
<button onClick={fetchClusters} style={{ marginTop: '1rem', backgroundColor: '#EF4444', color: 'white', border: 'none', padding: '0.5rem 1.5rem', borderRadius: '6px', cursor: 'pointer', fontWeight: '600' }}>
|
| 448 |
+
Retry
|
| 449 |
+
</button>
|
| 450 |
+
</div>
|
| 451 |
+
) : clusterEntries.length === 0 ? (
|
| 452 |
+
<div style={{ textAlign: 'center', padding: '4rem', color: '#64748b' }}>
|
| 453 |
+
<ClusterIcon />
|
| 454 |
+
<p style={{ marginTop: '1rem' }}>No clusters found. Run the clustering pipeline first.</p>
|
| 455 |
+
</div>
|
| 456 |
+
) : (
|
| 457 |
+
<>
|
| 458 |
+
{/* Cluster grid legend */}
|
| 459 |
+
<div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.5rem', marginBottom: '1.5rem' }}>
|
| 460 |
+
{clusterEntries.map(([label, profiles], idx) => {
|
| 461 |
+
const color = getColor(idx);
|
| 462 |
+
return (
|
| 463 |
+
<span key={label} style={{
|
| 464 |
+
fontSize: '0.78rem', padding: '4px 12px', borderRadius: '99px',
|
| 465 |
+
backgroundColor: `${color.accent}18`, color: color.accent,
|
| 466 |
+
border: `1px solid ${color.accent}44`, fontWeight: '600'
|
| 467 |
+
}}>
|
| 468 |
+
{label} ({profiles.length})
|
| 469 |
+
</span>
|
| 470 |
+
);
|
| 471 |
+
})}
|
| 472 |
+
</div>
|
| 473 |
+
|
| 474 |
+
{/* Cluster cards */}
|
| 475 |
+
{clusterEntries.map(([label, profiles], idx) => (
|
| 476 |
+
<ClusterCard
|
| 477 |
+
key={label}
|
| 478 |
+
label={label}
|
| 479 |
+
profiles={profiles}
|
| 480 |
+
colorIdx={idx}
|
| 481 |
+
searchQuery={searchQuery}
|
| 482 |
+
onViewProfile={setSelectedProfile}
|
| 483 |
+
/>
|
| 484 |
+
))}
|
| 485 |
+
</>
|
| 486 |
+
)}
|
| 487 |
+
|
| 488 |
+
{/* Profile modal */}
|
| 489 |
+
<AnimatePresence>
|
| 490 |
+
{selectedProfile && (
|
| 491 |
+
<ProfileModal profile={selectedProfile} onClose={() => setSelectedProfile(null)} />
|
| 492 |
+
)}
|
| 493 |
+
</AnimatePresence>
|
| 494 |
+
</div>
|
| 495 |
+
);
|
| 496 |
+
}
|
src/components/JobListings.jsx
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
import React, { useState, useEffect } from 'react';
|
| 2 |
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
-
import { supabase } from '../supabaseClient';
|
| 4 |
-
import { SearchIcon } from './Icons';
|
| 5 |
-
import JobDetail from './JobDetail';
|
| 6 |
-
import ApplyModel from './ApplyModel';
|
| 7 |
-
import JobCard from './JobCard';
|
| 8 |
import VerificationModal from './VerificationModal'; // ✅ Import the new modal
|
| 9 |
|
| 10 |
export default function JobListings({ searchQuery, setSearchQuery, isSearching, filteredJobListings }) {
|
| 11 |
-
|
| 12 |
const [selectedJob, setSelectedJob] = useState(null);
|
| 13 |
const [appliedJobIds, setAppliedJobIds] = useState(new Set());
|
| 14 |
-
const [applying, setApplying] = useState(null);
|
| 15 |
-
|
| 16 |
// State for the Apply Modal
|
| 17 |
const [jobToApply, setJobToApply] = useState(null);
|
| 18 |
|
|
@@ -28,7 +28,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 28 |
.from('applications')
|
| 29 |
.select('job_id')
|
| 30 |
.eq('user_id', user.id);
|
| 31 |
-
|
| 32 |
if (data) {
|
| 33 |
setAppliedJobIds(new Set(data.map(app => app.job_id)));
|
| 34 |
}
|
|
@@ -40,7 +40,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 40 |
// 2. Open Apply Modal
|
| 41 |
const initiateApply = (jobId) => {
|
| 42 |
const job = filteredJobListings.find(j => j.id === jobId);
|
| 43 |
-
if(job) {
|
| 44 |
setJobToApply(job);
|
| 45 |
}
|
| 46 |
};
|
|
@@ -48,12 +48,12 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 48 |
// 3. Submit Application (With Verification Gatekeeper)
|
| 49 |
const handleFinalSubmit = async (formData) => {
|
| 50 |
if (!jobToApply) return;
|
| 51 |
-
|
| 52 |
setApplying(jobToApply.id);
|
| 53 |
-
|
| 54 |
try {
|
| 55 |
const { data: { user } } = await supabase.auth.getUser();
|
| 56 |
-
|
| 57 |
if (!user) {
|
| 58 |
alert("Please log in to apply.");
|
| 59 |
return;
|
|
@@ -69,30 +69,30 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 69 |
if (profileError) throw profileError;
|
| 70 |
|
| 71 |
// If NOT verified, stop the application and show modal
|
| 72 |
-
if (!profile.is_phone_verified) {
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
|
| 79 |
// --- ✅ IF VERIFIED: Proceed with Application ---
|
| 80 |
const { error } = await supabase
|
| 81 |
.from('applications')
|
| 82 |
-
.insert([{
|
| 83 |
-
job_id: jobToApply.id,
|
| 84 |
user_id: user.id,
|
| 85 |
status: 'Pending',
|
| 86 |
-
resume_url: formData.resume_url,
|
| 87 |
-
cover_letter: formData.cover_letter
|
| 88 |
}]);
|
| 89 |
|
| 90 |
if (error) throw error;
|
| 91 |
|
| 92 |
setAppliedJobIds(prev => new Set(prev).add(jobToApply.id));
|
| 93 |
-
alert("Application submitted successfully!");
|
| 94 |
-
|
| 95 |
-
setJobToApply(null);
|
| 96 |
|
| 97 |
} catch (error) {
|
| 98 |
console.error("Error applying:", error.message);
|
|
@@ -144,16 +144,16 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 144 |
<input type="text" value={searchQuery} onChange={(e) => setSearchQuery(e.target.value)} placeholder="Search by job title..." style={{ width: '100%', padding: '0.75rem 1rem 0.75rem 2.5rem', borderRadius: '0.5rem', border: '1px solid rgba(251, 191, 36, 0.3)', backgroundColor: 'rgba(255,255,255,0.1)', color: 'white' }} />
|
| 145 |
</div>
|
| 146 |
</div>
|
| 147 |
-
|
| 148 |
{/* Job Grid */}
|
| 149 |
<motion.main layout style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))', gap: '2rem' }}>
|
| 150 |
<AnimatePresence>
|
| 151 |
{filteredJobListings.length > 0 ? (
|
| 152 |
filteredJobListings.map((job) => (
|
| 153 |
<motion.div key={job.id} layout initial={{ opacity: 0, scale: 0.8 }} animate={{ opacity: 1, scale: 1 }} exit={{ opacity: 0, scale: 0.8 }} transition={{ duration: 0.2 }}>
|
| 154 |
-
<JobCard
|
| 155 |
-
{...job}
|
| 156 |
-
onViewDetails={() => setSelectedJob(job)}
|
| 157 |
onApply={() => initiateApply(job.id)}
|
| 158 |
onWithdraw={() => handleWithdraw(job.id)}
|
| 159 |
isApplied={appliedJobIds.has(job.id)}
|
|
@@ -162,16 +162,16 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 162 |
</motion.div>
|
| 163 |
))
|
| 164 |
) : (
|
| 165 |
-
<motion.p initial={{opacity: 0}} animate={{opacity: 1}} style={{ color: '#d1d5db' }}>No jobs found.</motion.p>
|
| 166 |
)}
|
| 167 |
</AnimatePresence>
|
| 168 |
</motion.main>
|
| 169 |
|
| 170 |
{/* Job Detail Modal */}
|
| 171 |
{selectedJob && (
|
| 172 |
-
<JobDetail
|
| 173 |
-
job={selectedJob}
|
| 174 |
-
onClose={() => setSelectedJob(null)}
|
| 175 |
onApply={() => initiateApply(selectedJob.id)}
|
| 176 |
isApplied={appliedJobIds.has(selectedJob.id)}
|
| 177 |
isApplying={applying === selectedJob.id}
|
|
@@ -180,7 +180,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 180 |
|
| 181 |
{/* Apply Form Modal */}
|
| 182 |
{jobToApply && (
|
| 183 |
-
<ApplyModel
|
| 184 |
job={jobToApply}
|
| 185 |
isSubmitting={applying === jobToApply.id}
|
| 186 |
onClose={() => setJobToApply(null)}
|
|
@@ -190,7 +190,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
|
|
| 190 |
|
| 191 |
{/* ✅ OTP Verification Modal */}
|
| 192 |
{showVerificationModal && (
|
| 193 |
-
<VerificationModal
|
| 194 |
onClose={() => setShowVerificationModal(false)}
|
| 195 |
onVerified={() => {
|
| 196 |
setShowVerificationModal(false);
|
|
|
|
| 1 |
import React, { useState, useEffect } from 'react';
|
| 2 |
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { supabase } from '../supabaseClient';
|
| 4 |
+
import { SearchIcon } from './Icons';
|
| 5 |
+
import JobDetail from './JobDetail';
|
| 6 |
+
import ApplyModel from './ApplyModel';
|
| 7 |
+
import JobCard from './JobCard';
|
| 8 |
import VerificationModal from './VerificationModal'; // ✅ Import the new modal
|
| 9 |
|
| 10 |
export default function JobListings({ searchQuery, setSearchQuery, isSearching, filteredJobListings }) {
|
| 11 |
+
|
| 12 |
const [selectedJob, setSelectedJob] = useState(null);
|
| 13 |
const [appliedJobIds, setAppliedJobIds] = useState(new Set());
|
| 14 |
+
const [applying, setApplying] = useState(null);
|
| 15 |
+
|
| 16 |
// State for the Apply Modal
|
| 17 |
const [jobToApply, setJobToApply] = useState(null);
|
| 18 |
|
|
|
|
| 28 |
.from('applications')
|
| 29 |
.select('job_id')
|
| 30 |
.eq('user_id', user.id);
|
| 31 |
+
|
| 32 |
if (data) {
|
| 33 |
setAppliedJobIds(new Set(data.map(app => app.job_id)));
|
| 34 |
}
|
|
|
|
| 40 |
// 2. Open Apply Modal
|
| 41 |
const initiateApply = (jobId) => {
|
| 42 |
const job = filteredJobListings.find(j => j.id === jobId);
|
| 43 |
+
if (job) {
|
| 44 |
setJobToApply(job);
|
| 45 |
}
|
| 46 |
};
|
|
|
|
| 48 |
// 3. Submit Application (With Verification Gatekeeper)
|
| 49 |
const handleFinalSubmit = async (formData) => {
|
| 50 |
if (!jobToApply) return;
|
| 51 |
+
|
| 52 |
setApplying(jobToApply.id);
|
| 53 |
+
|
| 54 |
try {
|
| 55 |
const { data: { user } } = await supabase.auth.getUser();
|
| 56 |
+
|
| 57 |
if (!user) {
|
| 58 |
alert("Please log in to apply.");
|
| 59 |
return;
|
|
|
|
| 69 |
if (profileError) throw profileError;
|
| 70 |
|
| 71 |
// If NOT verified, stop the application and show modal
|
| 72 |
+
/** if (!profile.is_phone_verified) {
|
| 73 |
+
setApplying(null); // Stop loading spinner
|
| 74 |
+
setJobToApply(null); // Close application form
|
| 75 |
+
setShowVerificationModal(true); // Open Verification Modal
|
| 76 |
+
return; // 🛑 Stop execution here
|
| 77 |
+
} **/
|
| 78 |
|
| 79 |
// --- ✅ IF VERIFIED: Proceed with Application ---
|
| 80 |
const { error } = await supabase
|
| 81 |
.from('applications')
|
| 82 |
+
.insert([{
|
| 83 |
+
job_id: jobToApply.id,
|
| 84 |
user_id: user.id,
|
| 85 |
status: 'Pending',
|
| 86 |
+
resume_url: formData.resume_url,
|
| 87 |
+
cover_letter: formData.cover_letter
|
| 88 |
}]);
|
| 89 |
|
| 90 |
if (error) throw error;
|
| 91 |
|
| 92 |
setAppliedJobIds(prev => new Set(prev).add(jobToApply.id));
|
| 93 |
+
alert("Application submitted successfully!");
|
| 94 |
+
|
| 95 |
+
setJobToApply(null);
|
| 96 |
|
| 97 |
} catch (error) {
|
| 98 |
console.error("Error applying:", error.message);
|
|
|
|
| 144 |
<input type="text" value={searchQuery} onChange={(e) => setSearchQuery(e.target.value)} placeholder="Search by job title..." style={{ width: '100%', padding: '0.75rem 1rem 0.75rem 2.5rem', borderRadius: '0.5rem', border: '1px solid rgba(251, 191, 36, 0.3)', backgroundColor: 'rgba(255,255,255,0.1)', color: 'white' }} />
|
| 145 |
</div>
|
| 146 |
</div>
|
| 147 |
+
|
| 148 |
{/* Job Grid */}
|
| 149 |
<motion.main layout style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))', gap: '2rem' }}>
|
| 150 |
<AnimatePresence>
|
| 151 |
{filteredJobListings.length > 0 ? (
|
| 152 |
filteredJobListings.map((job) => (
|
| 153 |
<motion.div key={job.id} layout initial={{ opacity: 0, scale: 0.8 }} animate={{ opacity: 1, scale: 1 }} exit={{ opacity: 0, scale: 0.8 }} transition={{ duration: 0.2 }}>
|
| 154 |
+
<JobCard
|
| 155 |
+
{...job}
|
| 156 |
+
onViewDetails={() => setSelectedJob(job)}
|
| 157 |
onApply={() => initiateApply(job.id)}
|
| 158 |
onWithdraw={() => handleWithdraw(job.id)}
|
| 159 |
isApplied={appliedJobIds.has(job.id)}
|
|
|
|
| 162 |
</motion.div>
|
| 163 |
))
|
| 164 |
) : (
|
| 165 |
+
<motion.p initial={{ opacity: 0 }} animate={{ opacity: 1 }} style={{ color: '#d1d5db' }}>No jobs found.</motion.p>
|
| 166 |
)}
|
| 167 |
</AnimatePresence>
|
| 168 |
</motion.main>
|
| 169 |
|
| 170 |
{/* Job Detail Modal */}
|
| 171 |
{selectedJob && (
|
| 172 |
+
<JobDetail
|
| 173 |
+
job={selectedJob}
|
| 174 |
+
onClose={() => setSelectedJob(null)}
|
| 175 |
onApply={() => initiateApply(selectedJob.id)}
|
| 176 |
isApplied={appliedJobIds.has(selectedJob.id)}
|
| 177 |
isApplying={applying === selectedJob.id}
|
|
|
|
| 180 |
|
| 181 |
{/* Apply Form Modal */}
|
| 182 |
{jobToApply && (
|
| 183 |
+
<ApplyModel
|
| 184 |
job={jobToApply}
|
| 185 |
isSubmitting={applying === jobToApply.id}
|
| 186 |
onClose={() => setJobToApply(null)}
|
|
|
|
| 190 |
|
| 191 |
{/* ✅ OTP Verification Modal */}
|
| 192 |
{showVerificationModal && (
|
| 193 |
+
<VerificationModal
|
| 194 |
onClose={() => setShowVerificationModal(false)}
|
| 195 |
onVerified={() => {
|
| 196 |
setShowVerificationModal(false);
|
src/pages/Admindashboard.jsx
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import React, { useState } from 'react';
|
| 2 |
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
-
import { supabase } from '../supabaseClient';
|
| 4 |
|
| 5 |
// Import the new split modules
|
| 6 |
import AdminLayout from '../components/admin/AdminLayout';
|
|
@@ -9,6 +9,7 @@ import AdminSortingPage from '../components/admin/AdminSortingPage';
|
|
| 9 |
import AdminInterviewManagement from '../components/admin/AdminInterviewManagement';
|
| 10 |
import AdminProfile from '../components/admin/AdminProfile';
|
| 11 |
import JobPosting from './JobPosting'; // Import your existing JobPosting component
|
|
|
|
| 12 |
|
| 13 |
export default function AdminDashboard({ onNavigate }) {
|
| 14 |
const [activeTab, setActiveTab] = useState('dashboard');
|
|
@@ -18,23 +19,25 @@ export default function AdminDashboard({ onNavigate }) {
|
|
| 18 |
switch (activeTab) {
|
| 19 |
case 'dashboard':
|
| 20 |
return <AdminSummary onNavigate={onNavigate} setIsModalOpen={setIsModalOpen} />;
|
| 21 |
-
case 'jobs':
|
| 22 |
return <AdminSortingPage />;
|
| 23 |
-
case 'messages':
|
| 24 |
return <AdminInterviewManagement />;
|
| 25 |
-
case 'job-management':
|
| 26 |
return <JobPosting />;
|
| 27 |
-
case '
|
|
|
|
|
|
|
| 28 |
return <AdminProfile onNavigate={onNavigate} />;
|
| 29 |
-
default:
|
| 30 |
return null;
|
| 31 |
}
|
| 32 |
};
|
| 33 |
|
| 34 |
-
const contentVariants = {
|
| 35 |
-
hidden: { opacity: 0, y: 10 },
|
| 36 |
-
visible: { opacity: 1, y: 0 },
|
| 37 |
-
exit: { opacity: 0, y: -10 }
|
| 38 |
};
|
| 39 |
|
| 40 |
return (
|
|
|
|
| 1 |
import React, { useState } from 'react';
|
| 2 |
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { supabase } from '../supabaseClient';
|
| 4 |
|
| 5 |
// Import the new split modules
|
| 6 |
import AdminLayout from '../components/admin/AdminLayout';
|
|
|
|
| 9 |
import AdminInterviewManagement from '../components/admin/AdminInterviewManagement';
|
| 10 |
import AdminProfile from '../components/admin/AdminProfile';
|
| 11 |
import JobPosting from './JobPosting'; // Import your existing JobPosting component
|
| 12 |
+
import TalentClusters from '../components/Admin/TalentClusters';
|
| 13 |
|
| 14 |
export default function AdminDashboard({ onNavigate }) {
|
| 15 |
const [activeTab, setActiveTab] = useState('dashboard');
|
|
|
|
| 19 |
switch (activeTab) {
|
| 20 |
case 'dashboard':
|
| 21 |
return <AdminSummary onNavigate={onNavigate} setIsModalOpen={setIsModalOpen} />;
|
| 22 |
+
case 'jobs':
|
| 23 |
return <AdminSortingPage />;
|
| 24 |
+
case 'messages':
|
| 25 |
return <AdminInterviewManagement />;
|
| 26 |
+
case 'job-management':
|
| 27 |
return <JobPosting />;
|
| 28 |
+
case 'clusters':
|
| 29 |
+
return <TalentClusters />;
|
| 30 |
+
case 'settings':
|
| 31 |
return <AdminProfile onNavigate={onNavigate} />;
|
| 32 |
+
default:
|
| 33 |
return null;
|
| 34 |
}
|
| 35 |
};
|
| 36 |
|
| 37 |
+
const contentVariants = {
|
| 38 |
+
hidden: { opacity: 0, y: 10 },
|
| 39 |
+
visible: { opacity: 1, y: 0 },
|
| 40 |
+
exit: { opacity: 0, y: -10 }
|
| 41 |
};
|
| 42 |
|
| 43 |
return (
|
src/pages/ApplicantProfile.jsx
CHANGED
|
@@ -25,7 +25,7 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 25 |
try {
|
| 26 |
// Get current user
|
| 27 |
const { data: { user } } = await supabase.auth.getUser();
|
| 28 |
-
|
| 29 |
if (user) {
|
| 30 |
// Fetch Profile using maybeSingle() to avoid errors if empty
|
| 31 |
const { data: profile, error } = await supabase
|
|
@@ -44,7 +44,7 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 44 |
setFormData(combinedData);
|
| 45 |
setOriginalFormData(combinedData);
|
| 46 |
if (profile.avatar_url) {
|
| 47 |
-
|
| 48 |
}
|
| 49 |
} else {
|
| 50 |
// New user - Initialize with just email
|
|
@@ -90,7 +90,7 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 90 |
const newValue = type === 'checkbox' ? checked : value;
|
| 91 |
setFormData(prev => ({ ...prev, [name]: newValue }));
|
| 92 |
};
|
| 93 |
-
|
| 94 |
const handleAddExperience = () => {
|
| 95 |
const newExperience = { id: Date.now(), company: '', role: '', years: '' };
|
| 96 |
setFormData(prev => ({
|
|
@@ -98,7 +98,7 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 98 |
work_experience: [...(prev.work_experience || []), newExperience]
|
| 99 |
}));
|
| 100 |
};
|
| 101 |
-
|
| 102 |
const handleExperienceChange = (index, e) => {
|
| 103 |
const { name, value } = e.target;
|
| 104 |
const updatedExperience = [...(formData.work_experience || [])];
|
|
@@ -110,7 +110,7 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 110 |
if (!isEditing || !e.target.files || e.target.files.length === 0) return;
|
| 111 |
setResumeFile(e.target.files[0]);
|
| 112 |
};
|
| 113 |
-
|
| 114 |
const handleAvatarFileChange = (e) => {
|
| 115 |
if (!isEditing || !e.target.files || e.target.files.length === 0) return;
|
| 116 |
const file = e.target.files[0];
|
|
@@ -135,12 +135,23 @@ export default function ApplicantProfile({ onNavigate }) {
|
|
| 135 |
}
|
| 136 |
|
| 137 |
if (resumeFile) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
const filePath = `${user.id}/${Date.now()}_${resumeFile.name}`;
|
| 139 |
// Make sure your bucket is named 'resumes' (plural) or 'resume' (singular) to match your Supabase Storage
|
| 140 |
await supabase.storage.from('resume').upload(filePath, resumeFile, { upsert: true });
|
| 141 |
updates.resume_url = filePath;
|
| 142 |
}
|
| 143 |
-
|
| 144 |
const { error } = await supabase.from('profiles').upsert(updates);
|
| 145 |
if (error) throw error;
|
| 146 |
|
|
|
|
| 25 |
try {
|
| 26 |
// Get current user
|
| 27 |
const { data: { user } } = await supabase.auth.getUser();
|
| 28 |
+
|
| 29 |
if (user) {
|
| 30 |
// Fetch Profile using maybeSingle() to avoid errors if empty
|
| 31 |
const { data: profile, error } = await supabase
|
|
|
|
| 44 |
setFormData(combinedData);
|
| 45 |
setOriginalFormData(combinedData);
|
| 46 |
if (profile.avatar_url) {
|
| 47 |
+
setAvatarUrl(profile.avatar_url);
|
| 48 |
}
|
| 49 |
} else {
|
| 50 |
// New user - Initialize with just email
|
|
|
|
| 90 |
const newValue = type === 'checkbox' ? checked : value;
|
| 91 |
setFormData(prev => ({ ...prev, [name]: newValue }));
|
| 92 |
};
|
| 93 |
+
|
| 94 |
const handleAddExperience = () => {
|
| 95 |
const newExperience = { id: Date.now(), company: '', role: '', years: '' };
|
| 96 |
setFormData(prev => ({
|
|
|
|
| 98 |
work_experience: [...(prev.work_experience || []), newExperience]
|
| 99 |
}));
|
| 100 |
};
|
| 101 |
+
|
| 102 |
const handleExperienceChange = (index, e) => {
|
| 103 |
const { name, value } = e.target;
|
| 104 |
const updatedExperience = [...(formData.work_experience || [])];
|
|
|
|
| 110 |
if (!isEditing || !e.target.files || e.target.files.length === 0) return;
|
| 111 |
setResumeFile(e.target.files[0]);
|
| 112 |
};
|
| 113 |
+
|
| 114 |
const handleAvatarFileChange = (e) => {
|
| 115 |
if (!isEditing || !e.target.files || e.target.files.length === 0) return;
|
| 116 |
const file = e.target.files[0];
|
|
|
|
| 135 |
}
|
| 136 |
|
| 137 |
if (resumeFile) {
|
| 138 |
+
// Delete old resume if it exists to prevent duplication
|
| 139 |
+
if (originalFormData?.resume_url) {
|
| 140 |
+
try {
|
| 141 |
+
const oldPath = originalFormData.resume_url;
|
| 142 |
+
const { error: removeError } = await supabase.storage.from('resume').remove([oldPath]);
|
| 143 |
+
if (removeError) console.warn("Could not delete old resume:", removeError.message);
|
| 144 |
+
} catch (e) {
|
| 145 |
+
console.warn("Exception during old resume removal:", e);
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
const filePath = `${user.id}/${Date.now()}_${resumeFile.name}`;
|
| 150 |
// Make sure your bucket is named 'resumes' (plural) or 'resume' (singular) to match your Supabase Storage
|
| 151 |
await supabase.storage.from('resume').upload(filePath, resumeFile, { upsert: true });
|
| 152 |
updates.resume_url = filePath;
|
| 153 |
}
|
| 154 |
+
|
| 155 |
const { error } = await supabase.from('profiles').upsert(updates);
|
| 156 |
if (error) throw error;
|
| 157 |
|
system_architecture.txt
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IRIS Detailed System Architecture
|
| 2 |
+
|
| 3 |
+
This document provides a comprehensive look at the IRIS architecture, broken down by functional layers and individual process steps.
|
| 4 |
+
|
| 5 |
+
## Overall System Flow
|
| 6 |
+
|
| 7 |
+
This tiered diagram shows how data flows through the three main layers of the system.
|
| 8 |
+
|
| 9 |
+
```mermaid
|
| 10 |
+
graph TD
|
| 11 |
+
subgraph "1. Ingestion & Preprocessing"
|
| 12 |
+
UC[User/Admin] -->|Upload| SS[Supabase Storage]
|
| 13 |
+
SS -->|Webhook| BE[FastAPI Backend]
|
| 14 |
+
BE -->|Download| PC[Text Cleaning]
|
| 15 |
+
PC -->|Anonymize| PA[PII Removal]
|
| 16 |
+
end
|
| 17 |
+
|
| 18 |
+
subgraph "2. NLP Processing Layer"
|
| 19 |
+
PA -->|Raw Text| EX[Gemini Extraction]
|
| 20 |
+
EX -->|JSON| DB[(Supabase DB)]
|
| 21 |
+
DB -->|Text Fields| EM[BGE-M3 Embedding]
|
| 22 |
+
EM -->|Vectors| DB
|
| 23 |
+
end
|
| 24 |
+
|
| 25 |
+
subgraph "3. Matching & AI Analysis"
|
| 26 |
+
DB -->|Job vs Resume| MS[Semantic Matching]
|
| 27 |
+
MS -->|Score| MG[Skill Gap Analysis]
|
| 28 |
+
MG -->|Insights| AI[Gemini Analysis]
|
| 29 |
+
AI -->|Final Report| UI[Admin Dashboard]
|
| 30 |
+
end
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
## 1. Data Ingestion & Preprocessing
|
| 36 |
+
This layer ensures that incoming data is clean, secure, and ready for AI processing.
|
| 37 |
+
|
| 38 |
+
* **File Upload**: Resumes and Job Descriptions are stored securely in Supabase buckets.
|
| 39 |
+
* **Event Trigger**: Database Webhooks instantly notify the backend when a new file arrives.
|
| 40 |
+
* **Text Cleaning**: Standardizes encoding, removes special characters, and handles whitespace.
|
| 41 |
+
* **PII Anonymization**: Uses Regex and NLP patterns to detect and protect sensitive personal information (phone, address) before deep processing.
|
| 42 |
+
|
| 43 |
+
## 2. NLP Processing Pipeline
|
| 44 |
+
The "Intelligence" layer that understands the meaning behind the text.
|
| 45 |
+
|
| 46 |
+
* **Structured Extraction**: Google Gemini parses unstructured text into logical objects (Skills, Experience, Education).
|
| 47 |
+
* **Relational Storage**: Structured data is saved into dedicated PostgreSQL tables for rapid querying.
|
| 48 |
+
* **Vector Embedding**: The BGE-M3 model creates "mathematical summaries" (vectors) of the candidate's profile and the job requirements.
|
| 49 |
+
* **Vector Search Index**: These vectors allow the system to find matches based on *meaning* rather than just keywords (e.g., matching "Software Engineer" with "Full Stack Developer").
|
| 50 |
+
|
| 51 |
+
## 3. Matching & AI Analysis Layer
|
| 52 |
+
The decision-making layer that provides final value to the recruiter.
|
| 53 |
+
|
| 54 |
+
* **Semantic Scoring**: Calculates the mathematical distance between a candidate's vector and a job's vector.
|
| 55 |
+
* **Skill Gap Analysis**: Compares the extracted skill sets to identify exactly what is missing or where the candidate excels.
|
| 56 |
+
* **AI Insight Generation**: A second pass with Gemini generates a human-readable summary, custom strengths, and potential weaknesses.
|
| 57 |
+
* **Final Ranking**: Aggregates all scores into a prioritized list for the Admin dashboard.
|
| 58 |
+
|
| 59 |
+
## Technology Stack
|
| 60 |
+
|
| 61 |
+
| Layer | Technologies |
|
| 62 |
+
| :--- | :--- |
|
| 63 |
+
| **Frontend** | React, Vite, Framer Motion, Lucide Icons |
|
| 64 |
+
| **Backend** | FastAPI, Python, SQLAlchemy/Supabase-py |
|
| 65 |
+
| **Data** | Supabase (Postgres), pgvector, Supabase Storage |
|
| 66 |
+
| **AI/ML** | Google Gemini (LLM), BGE-M3 (Embeddings), Sentence Transformers |
|