Saandraahh commited on
Commit
4b3a33f
·
1 Parent(s): 84d4394

Implemented clustering

Browse files
Files changed (45) hide show
  1. Supabase/.temp/cli-latest +1 -1
  2. Supabase/functions/otp/index.ts +8 -5
  3. backend/api.py +12 -4
  4. backend/check_clusters_after_run.py +23 -0
  5. backend/check_db_clustering.py +33 -0
  6. backend/check_job_data.py +26 -0
  7. backend/debug_profile.json +90 -0
  8. backend/debug_score.py +55 -0
  9. backend/docs/efficiency_guide.md +41 -0
  10. backend/final_verify.py +38 -0
  11. backend/fix_profile_embeddings_trigger.sql +56 -0
  12. backend/generate_realistic_resumes.py +183 -0
  13. backend/inspect_columns.py +30 -0
  14. backend/inspect_schema.py +28 -0
  15. backend/inspect_schema_fixed.py +34 -0
  16. backend/out_cmd.txt +20 -0
  17. backend/realistic_synthetic_resumes.json +0 -0
  18. backend/remove_triggers_for_profile_embeddings.sql +19 -0
  19. backend/repair_system_mismatches.sql +104 -0
  20. backend/requirements.txt +1 -0
  21. backend/script_output.txt +0 -0
  22. backend/src/embeddings/benchmark_bge.py +55 -0
  23. backend/src/embeddings/evaluate_quality.py +197 -0
  24. backend/src/embeddings/job_embed.py +1 -1
  25. backend/src/embeddings/match_benchmark_granular.py +228 -0
  26. backend/src/embeddings/profile_entities_bench.py +115 -0
  27. backend/src/matching/similarity.py +40 -17
  28. backend/src/services/clustering_service.py +148 -0
  29. backend/src/services/test_clustering.py +21 -0
  30. backend/src/services/verify_labels.py +41 -0
  31. backend/supabase_ingest.py +9 -4
  32. backend/test_ingest_output.txt +0 -0
  33. debug_log.txt +15 -0
  34. entity_benchmark_scaled_results.txt +12 -0
  35. experimental_results.tex +53 -0
  36. match_benchmark_results.json +22 -0
  37. matching_analysis_report.md +0 -0
  38. quality_metrics_adversarial.json +6 -0
  39. schema_dump.txt +22 -0
  40. src/components/Admin/AdminLayout.jsx +36 -34
  41. src/components/Admin/TalentClusters.jsx +496 -0
  42. src/components/JobListings.jsx +36 -36
  43. src/pages/Admindashboard.jsx +13 -10
  44. src/pages/ApplicantProfile.jsx +17 -6
  45. system_architecture.txt +66 -0
Supabase/.temp/cli-latest CHANGED
@@ -1 +1 @@
1
- v2.67.1
 
1
+ v2.75.0
Supabase/functions/otp/index.ts CHANGED
@@ -6,7 +6,7 @@ const corsHeaders = {
6
  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
7
  };
8
 
9
- serve(async (req) => {
10
  if (req.method === 'OPTIONS') {
11
  return new Response('ok', { headers: corsHeaders });
12
  }
@@ -35,6 +35,7 @@ serve(async (req) => {
35
  // ACTION: SEND SMS (VIA TWILIO)
36
  // ==========================================
37
  if (action === 'send') {
 
38
  const { data: profile } = await supabaseAdmin
39
  .from('profiles')
40
  .select('phone')
@@ -88,10 +89,10 @@ serve(async (req) => {
88
  console.error("Twilio Error:", errorText);
89
  throw new Error("Failed to send SMS. Check server logs.");
90
  }
91
- // --- TWILIO LOGIC ENDS HERE ---
92
 
93
  return new Response(
94
- JSON.stringify({ message: "OTP sent successfully" }),
95
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
96
  );
97
  }
@@ -100,6 +101,7 @@ serve(async (req) => {
100
  // ACTION: VERIFY
101
  // ==========================================
102
  if (action === 'verify') {
 
103
  if (!userCode) throw new Error("Missing OTP code");
104
 
105
  const { data: profile } = await supabaseAdmin.from('profiles').select('phone').eq('id', user.id).single();
@@ -119,16 +121,17 @@ serve(async (req) => {
119
  // Success
120
  await supabaseAdmin.from('profiles').update({ is_phone_verified: true }).eq('id', user.id);
121
  await supabaseAdmin.from('otp_verifications').delete().eq('phone', phone);
 
122
 
123
  return new Response(
124
- JSON.stringify({ message: "Phone verified successfully!" }),
125
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
126
  );
127
  }
128
 
129
  return new Response(JSON.stringify({ error: "Invalid Action" }), { status: 400, headers: corsHeaders });
130
 
131
- } catch (error) {
132
  return new Response(
133
  JSON.stringify({ error: error.message }),
134
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 }
 
6
  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
7
  };
8
 
9
+ serve(async (req: Request) => {
10
  if (req.method === 'OPTIONS') {
11
  return new Response('ok', { headers: corsHeaders });
12
  }
 
35
  // ACTION: SEND SMS (VIA TWILIO)
36
  // ==========================================
37
  if (action === 'send') {
38
+ /** // Logic commented out to disable phone verification
39
  const { data: profile } = await supabaseAdmin
40
  .from('profiles')
41
  .select('phone')
 
89
  console.error("Twilio Error:", errorText);
90
  throw new Error("Failed to send SMS. Check server logs.");
91
  }
92
+ **/
93
 
94
  return new Response(
95
+ JSON.stringify({ message: "OTP sent successfully (Verification disabled)" }),
96
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
97
  );
98
  }
 
101
  // ACTION: VERIFY
102
  // ==========================================
103
  if (action === 'verify') {
104
+ /** // Logic commented out to disable phone verification
105
  if (!userCode) throw new Error("Missing OTP code");
106
 
107
  const { data: profile } = await supabaseAdmin.from('profiles').select('phone').eq('id', user.id).single();
 
121
  // Success
122
  await supabaseAdmin.from('profiles').update({ is_phone_verified: true }).eq('id', user.id);
123
  await supabaseAdmin.from('otp_verifications').delete().eq('phone', phone);
124
+ **/
125
 
126
  return new Response(
127
+ JSON.stringify({ message: "Phone verified successfully! (Verification disabled)" }),
128
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
129
  );
130
  }
131
 
132
  return new Response(JSON.stringify({ error: "Invalid Action" }), { status: 400, headers: corsHeaders });
133
 
134
+ } catch (error: any) {
135
  return new Response(
136
  JSON.stringify({ error: error.message }),
137
  { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 }
backend/api.py CHANGED
@@ -262,18 +262,26 @@ async def perform_candidate_analysis(candidate_id: str, job_id: str, force_refre
262
 
263
  # 6. Persist to Database
264
  try:
 
265
  data_to_save = {
266
  "ai_summary": ai_insights.get("summary"),
267
  "ai_insights": {
268
  "weaknesses": ai_insights.get("weaknesses") or [],
269
  "missing_skills": missing,
270
- "score_breakdown": semantic_result.get("breakdown")
271
  },
272
- "AI_score": ai_insights.get("score") or 0,
273
- "semantic_score": semantic_result.get("total_score")
 
 
 
 
 
 
 
274
  }
275
  client.table("applications").update(data_to_save).eq("user_id", candidate_id).eq("job_id", job_id).execute()
276
- print(f"💾 Persisted AI analysis for candidate {candidate_id}")
277
  except Exception as db_err:
278
  print(f"⚠️ Failed to persist AI analysis: {db_err}")
279
 
 
262
 
263
  # 6. Persist to Database
264
  try:
265
+ breakdown = semantic_result.get("breakdown") or {}
266
  data_to_save = {
267
  "ai_summary": ai_insights.get("summary"),
268
  "ai_insights": {
269
  "weaknesses": ai_insights.get("weaknesses") or [],
270
  "missing_skills": missing,
271
+ "score_breakdown": breakdown
272
  },
273
+ "AI_score": int(ai_insights.get("score") or 0),
274
+ "match_score": int(semantic_result.get("total_score") or 0),
275
+ # Granular Scores mapping to table columns
276
+ "skills_match": int(breakdown.get("skills", 0)),
277
+ "technical_skills_match": int(breakdown.get("technical_skills", 0)),
278
+ "work_experience_match": int(breakdown.get("experience", 0)),
279
+ "education_match": int(breakdown.get("education", 0)),
280
+ "certifications_match": int(breakdown.get("certifications", 0)),
281
+ "project_match": int(breakdown.get("projects", 0))
282
  }
283
  client.table("applications").update(data_to_save).eq("user_id", candidate_id).eq("job_id", job_id).execute()
284
+ print(f"💾 Persisted AI analysis and granular scores for candidate {candidate_id}")
285
  except Exception as db_err:
286
  print(f"⚠️ Failed to persist AI analysis: {db_err}")
287
 
backend/check_clusters_after_run.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from supabase import create_client
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
9
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
10
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
11
+
12
+ async def check_clusters():
13
+ res = client.table("profiles").select("id, cluster_label").limit(10).execute()
14
+ if not res.data:
15
+ print("No profiles found")
16
+ return
17
+
18
+ print("Sample Cluster Labels:")
19
+ for row in res.data:
20
+ print(f" - ID: {row['id']} | Label: {row['cluster_label']}")
21
+
22
+ if __name__ == "__main__":
23
+ asyncio.run(check_clusters())
backend/check_db_clustering.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from supabase import create_client, Client
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ url = os.environ.get("SUPABASE_URL")
8
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
9
+ client: Client = create_client(url, key)
10
+
11
+ def check_clustering_status():
12
+ print("Checking profiles table for cluster labels...")
13
+ resp = client.table("profiles").select("id, cluster_label").limit(20).execute()
14
+ data = resp.data
15
+
16
+ if not data:
17
+ print("No profiles found.")
18
+ return
19
+
20
+ # Count how many have labels
21
+ labeled = [d for d in data if d.get("cluster_label")]
22
+ print(f"Sample size: {len(data)}")
23
+ print(f"Profiles with cluster_label: {len(labeled)}")
24
+
25
+ if labeled:
26
+ print("Sample labels:")
27
+ for d in labeled[:5]:
28
+ print(f" - {d['id']}: {d['cluster_label']}")
29
+ else:
30
+ print("No profiles have cluster labels in this sample.")
31
+
32
+ if __name__ == "__main__":
33
+ check_clustering_status()
backend/check_job_data.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from supabase import create_client
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
9
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
10
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
11
+
12
+ async def check_job():
13
+ job_id = "45bcca29-4e12-45bf-97d4-0b77ff55472f"
14
+ res = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()
15
+ if not res.data:
16
+ print("Job not found in job_embeddings")
17
+ return
18
+
19
+ data = res.data[0]
20
+ print(f"Data for Job {job_id}:")
21
+ for k, v in data.items():
22
+ if k in ['job_id', 'created_at', 'updated_at']: continue
23
+ print(f" - {k}: {'POPULATED' if v else 'NULL'}")
24
+
25
+ if __name__ == "__main__":
26
+ asyncio.run(check_job())
backend/debug_profile.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "a29ba56a-0d5b-4bc9-9a15-e314f6447260",
3
+ "updated_at": "2026-01-30T06:38:28.185688+00:00",
4
+ "full_name": null,
5
+ "role": "applicant",
6
+ "company_id": null,
7
+ "avatar_url": null,
8
+ "resume_url": "a29ba56a-0d5b-4bc9-9a15-e314f6447260/1769755098632_resume_ey.pdf",
9
+ "location": null,
10
+ "headline": "Final-year Computer Science student hands-on experience Machine Learning. Skilled React, Python, Flask, Supabase (PostgreSQL). Built ATS-style resume screening tools stock price prediction apps.",
11
+ "summary": null,
12
+ "skills": [
13
+ "Artificial Intelligence",
14
+ "Machine Learning",
15
+ "Communication",
16
+ "Team Work",
17
+ "Problem Solving",
18
+ "Conflict resolution"
19
+ ],
20
+ "work_experience": [
21
+ {
22
+ "role": "AI/ML Intern",
23
+ "years": "June 2025",
24
+ "company": "ICT Academy Kerala",
25
+ "duration": "1 month",
26
+ "description": "Completed 1-month internship focused Artificial Intelligence Machine Learning. CreatedandassessedMLmodelsonreal-worlddatasets;improvedvalidationaccuracyafterfeatureengineering hyperparameter tuning."
27
+ },
28
+ {
29
+ "role": "Django Intern",
30
+ "years": "Sept 2023",
31
+ "company": "Neo Green Labs",
32
+ "duration": null,
33
+ "description": "Delivered Django API endpoints (CRUD) connected relational database. Supported REST API implementation production use cases."
34
+ }
35
+ ],
36
+ "education": [
37
+ {
38
+ "year": "Nov 2022 Ongoing",
39
+ "course": "B.Tech Computer Science",
40
+ "institution": "APJ Abdul Technological University"
41
+ },
42
+ {
43
+ "year": "Jun 2020 Mar 2022",
44
+ "course": "Higher Secondary Education",
45
+ "institution": "Carmel College Engineering Technology"
46
+ }
47
+ ],
48
+ "phone": "+91 8921173593",
49
+ "current_position": null,
50
+ "address": null,
51
+ "linkedin": null,
52
+ "github": null,
53
+ "portfolio": null,
54
+ "experience_years": null,
55
+ "certifications": "ICT Academy Kerala (2025), The Joy Of Computing Using Python (Elite Rank), NPTEL (2025)",
56
+ "technical_skills": "Python, Java, C, SQL, React, Flask, Supabase, PostgreSQL, Django, XGBoost, LSTM",
57
+ "languages": null,
58
+ "professional_references": null,
59
+ "desired_salary": null,
60
+ "industry_experience": null,
61
+ "career_goals": null,
62
+ "willing_to_relocate": false,
63
+ "available_remote": false,
64
+ "processed": true,
65
+ "file_hash": "58406de4a011cd48192fe9e8a8e93e0255263632344bec40629deb639b54e847",
66
+ "projects": [
67
+ {
68
+ "title": "CV Ordering And Numbering Application",
69
+ "description": "Implemented automated CV filtering, ranking, clustering using job-specific criteria skill similarity algorithms. Built role-based access control system administrators, recruiters, applicants ensure secure streamlined workflows. Added PDF Excel report generation, Systematized email notifications, ATS-compatible resume for- matting. Architected platform emphasis scalability, data privacy, user-centric design improve recruiter efficiency candidate experience.",
70
+ "technologies_used": [
71
+ "React",
72
+ "Vite",
73
+ "Supabase"
74
+ ]
75
+ },
76
+ {
77
+ "title": "Stock Price Prediction System",
78
+ "description": "Built React + Flask web app forecast stock prices using historical OHLCV data. TrainedandbenchmarkedXGBoostandLSTMmodels;servedpredictionsthroughRESTAPIsanddisplayed trends. Created responsive UI entering stock symbols comparing predicted vs. actual price trends; trained models Google Colab delivered Matplotlib plots Flask backend.",
79
+ "technologies_used": [
80
+ "React",
81
+ "Flask",
82
+ "Python"
83
+ ]
84
+ }
85
+ ],
86
+ "email": null,
87
+ "is_phone_verified": false,
88
+ "ai_score": 0,
89
+ "cluster_label": null
90
+ }
backend/debug_score.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import json
4
+ from supabase import create_client
5
+ from dotenv import load_dotenv
6
+ from src.matching.similarity import calculate_granular_match_score
7
+
8
+ load_dotenv()
9
+
10
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
11
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
12
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
13
+
14
+ async def run_test():
15
+ res = client.table("applications").select("user_id, job_id").limit(1).execute()
16
+ if not res.data:
17
+ print("No apps found")
18
+ return
19
+
20
+ c_id = res.data[0]["user_id"]
21
+ j_id = res.data[0]["job_id"]
22
+
23
+ # Raw fetch
24
+ p_emb = client.table("profile_embeddings").select("*").eq("id", c_id).execute().data[0]
25
+ j_emb = client.table("job_embeddings").select("*").eq("job_id", j_id).execute().data[0]
26
+
27
+ log = []
28
+ log.append(f"Testing {c_id} against {j_id}")
29
+
30
+ def get_len(v):
31
+ if v is None: return "None"
32
+ if isinstance(v, str):
33
+ try:
34
+ # Approximate len by comma count
35
+ return v.count(',') + 1
36
+ except: return "StringError"
37
+ return len(v)
38
+
39
+ log.append("\n--- Profile Lengths ---")
40
+ for k in ['skills', 'technical_skills', 'experience', 'certifications']:
41
+ log.append(f"{k}: {get_len(p_emb.get(k))}")
42
+
43
+ log.append("\n--- Job Lengths ---")
44
+ for k in ['skills', 'technical_skills', 'work_experience', 'certifications']:
45
+ log.append(f"{k}: {get_len(j_emb.get(k))}")
46
+
47
+ result = await calculate_granular_match_score(client, c_id, j_id)
48
+ log.append(f"\nResult: {json.dumps(result)}")
49
+
50
+ with open("debug_log.txt", "w") as f:
51
+ f.write("\n".join(log))
52
+ print("Logged to debug_log.txt")
53
+
54
+ if __name__ == "__main__":
55
+ asyncio.run(run_test())
backend/docs/efficiency_guide.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BGE-M3 Efficiency Guide
2
+
3
+ This guide explains how to measure and optimize the efficiency of the BAAI/bge-m3 model used in the IRIS project.
4
+
5
+ ## 1. Key Metrics
6
+
7
+ ### Performance (Infrastructure)
8
+ - **Latency**: Time taken to generate an embedding for a single text. Critical for real-time search.
9
+ - **Throughput**: Number of documents processed per second. Important for batch processing (e.g., initial profile indexing).
10
+ - **VRAM/RAM Usage**: Memory footprint of the model. BGE-M3 is ~2.2GB in FP32.
11
+
12
+ ### Retrieval Quality (Accuracy)
13
+ - **Precision@K**: The proportion of relevant candidates in the top K results.
14
+ * *Example*: If you return 10 candidates and 3 are actually qualified, Precision@10 = 30%.
15
+ - **Recall@K** (Correlation to User's "callback"): The proportion of total relevant candidates that were successfully captured in the top K.
16
+ * *Example*: If there are 5 qualified candidates in the database and your search finds 4 of them in the top 10, Recall@10 = 80%.
17
+ - **MRR (Mean Reciprocal Rank)**: Evaluates how high the first relevant candidate is ranked.
18
+ * *Formula*: $1 / Rank$. If the best candidate is at position #1, score is 1.0. If at #2, score is 0.5.
19
+ - **NDCG (Normalized Discounted Cumulative Gain)**: Measures the overall quality of the ranking order, giving more weight to highly relevant results at the very top.
20
+
21
+ ## 2. BGE-M3 Specific Features
22
+
23
+ BGE-M3 is a "multi-function" model. You can measure efficiency across three modes:
24
+ 1. **Dense Retrieval**: Standard 1024d vectors. Fast and semantic.
25
+ 2. **Sparse Retrieval (Lexical)**: Similar to BM25 but learned. More efficient for exact keyword matching.
26
+ 3. **Multi-Vector (ColBERT style)**: Most accurate but highest storage and latency cost.
27
+
28
+ ## 3. Optimization Techniques
29
+
30
+ ### Precision Tuning
31
+ - **FP16**: Use `model.half()` if on GPU to double speed and halve memory with negligible accuracy loss.
32
+ - **Quantization**: Int8 or GGUF formats can reduce memory usage by 4x.
33
+
34
+ ### Batching
35
+ Using optimal batch sizes (e.g., 16-32) significantly improves throughput compared to single-sentence processing.
36
+
37
+ ## 4. Measuring Quality in IRIS
38
+ To measure quality, create a "Golden Dataset" of (Job Description, Relevant Profiles) and calculate Hit Rate:
39
+ 1. Fetch top 10 profiles for a job.
40
+ 2. Check if the "ideal" candidate is in that list.
41
+ 3. Average this over 50 test cases.
backend/final_verify.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import json
4
+ from supabase import create_client
5
+ from dotenv import load_dotenv
6
+ from api import perform_candidate_analysis
7
+
8
+ load_dotenv()
9
+
10
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
11
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
12
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
13
+
14
+ async def verify():
15
+ res = client.table("applications").select("user_id, job_id").limit(1).execute()
16
+ if not res.data:
17
+ print("No apps found")
18
+ return
19
+
20
+ c_id = res.data[0]["user_id"]
21
+ j_id = res.data[0]["job_id"]
22
+
23
+ print(f"Triggering fresh analysis for {c_id} / {j_id}")
24
+ await perform_candidate_analysis(c_id, j_id, force_refresh=True)
25
+
26
+ print("\nChecking resulting record in DB:")
27
+ final_res = client.table("applications") \
28
+ .select("match_score, skills_match, technical_skills_match, work_experience_match, education_match, certifications_match, project_match") \
29
+ .eq("user_id", c_id).eq("job_id", j_id) \
30
+ .execute()
31
+
32
+ if final_res.data:
33
+ print(json.dumps(final_res.data[0], indent=2))
34
+ else:
35
+ print("Record not found after update")
36
+
37
+ if __name__ == "__main__":
38
+ asyncio.run(verify())
backend/fix_profile_embeddings_trigger.sql ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- fix_profile_embeddings_trigger.sql
2
+ -- Run this in your Supabase SQL Editor to fully resolve the "j_emb" error!
3
+
4
+ -- 1. Redefine the function used by the trigger that refreshes recommendations
5
+ -- The error "record j_emb has no field experience" was likely deeply cached in this logic
6
+ CREATE OR REPLACE FUNCTION public.trg_refresh_recommendations_for_user()
7
+ RETURNS trigger
8
+ LANGUAGE plpgsql
9
+ AS $function$
10
+ DECLARE
11
+ j_id uuid;
12
+ match_res json;
13
+ BEGIN
14
+ -- First clear out old recommendations for this user
15
+ DELETE FROM public.job_recommendations WHERE user_id = NEW.id;
16
+
17
+ -- Iterate through all existing job embeddings
18
+ FOR j_id IN SELECT job_id FROM public.job_embeddings LOOP
19
+
20
+ -- Call the fixed match_profile_job function
21
+ match_res := public.match_profile_job(NEW.id, j_id);
22
+
23
+ -- Only insert if there's an actual match > 0
24
+ IF (match_res->>'match_score')::int > 0 THEN
25
+ INSERT INTO public.job_recommendations (
26
+ user_id, job_id, match_score, skills_match, technical_skills_match,
27
+ work_experience_match, education_match, certifications_match, project_match
28
+ ) VALUES (
29
+ NEW.id, j_id,
30
+ (match_res->>'match_score')::int,
31
+ (match_res->>'skills_match')::int,
32
+ (match_res->>'technical_skills_match')::int,
33
+ (match_res->>'work_experience_match')::int,
34
+ (match_res->>'education_match')::int,
35
+ (match_res->>'certifications_match')::int,
36
+ (match_res->>'project_match')::int
37
+ );
38
+ END IF;
39
+
40
+ END LOOP;
41
+
42
+ RETURN NEW;
43
+ END;
44
+ $function$;
45
+
46
+ -- 2. Drop the redundant webhook trigger since you only need the recommendation refresh
47
+ -- Having both might cause race conditions or unnecessary webhooks
48
+ DROP TRIGGER IF EXISTS on_profile_embedding_upsert ON public.profile_embeddings;
49
+
50
+ -- 3. Ensure the embedding refresh trigger is properly attached
51
+ DROP TRIGGER IF EXISTS on_profile_embedding_change ON public.profile_embeddings;
52
+
53
+ CREATE TRIGGER on_profile_embedding_change
54
+ AFTER INSERT OR UPDATE ON public.profile_embeddings
55
+ FOR EACH ROW
56
+ EXECUTE FUNCTION trg_refresh_recommendations_for_user();
backend/generate_realistic_resumes.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ import uuid
4
+ import datetime
5
+ try:
6
+ from faker import Faker
7
+ except ImportError:
8
+ print("Faker not found. Please install it with: pip install Faker")
9
+ exit(1)
10
+
11
+ fake = Faker()
12
+
13
+ # ---------------------------------------------------------
14
+ # CONSTANTS & DICTIONARIES
15
+ # ---------------------------------------------------------
16
+
17
+ SOFT_SKILLS = [
18
+ "Communication", "Teamwork", "Adaptability", "Analytical Thinking", "Problem Solving",
19
+ "Leadership", "Time Management", "Critical Thinking", "Empathy", "Conflict Resolution",
20
+ "Creativity", "Attention to Detail", "Work Ethic", "Interpersonal Skills", "Emotional Intelligence"
21
+ ]
22
+
23
+ TECH_SKILLS = [
24
+ "Python", "Java", "C++", "C#", "JavaScript", "TypeScript", "React", "Angular", "Vue.js",
25
+ "Node.js", "Express", "Django", "Flask", "Spring Boot", "SQL", "PostgreSQL", "MySQL",
26
+ "MongoDB", "AWS", "Azure", "GCP", "Docker", "Kubernetes", "Git", "TensorFlow", "PyTorch",
27
+ "Pandas", "NumPy", "Scikit-learn", "HTML", "CSS", "Bash", "Linux"
28
+ ]
29
+
30
+ ROLES = [
31
+ "Software Engineer", "Frontend Developer", "Backend Developer", "Full Stack Developer",
32
+ "Data Scientist", "Machine Learning Engineer", "DevOps Engineer", "Cloud Architect",
33
+ "System Administrator", "Database Administrator", "QA Engineer", "Product Manager"
34
+ ]
35
+
36
+ DEGREES = [
37
+ "B.Tech in Computer Science and Engineering",
38
+ "B.S. in Computer Science",
39
+ "M.S. in Software Engineering",
40
+ "B.A. in Information Technology",
41
+ "M.Tech in Data Science",
42
+ "B.S. in Electrical Engineering",
43
+ "Bootcamp Graduate in Web Development"
44
+ ]
45
+
46
+ CERTIFICATIONS = [
47
+ "AWS Certified Solutions Architect", "Google Cloud Professional Data Engineer",
48
+ "Certified Kubernetes Administrator (CKA)", "Cisco Certified Network Associate (CCNA)",
49
+ "Microsoft Certified: Azure Administrator Associate", "CompTIA Security+",
50
+ "Deep Learning Specialization (Coursera)", "Oracle Certified Professional Java SE Programmer"
51
+ ]
52
+
53
+ # ---------------------------------------------------------
54
+ # GENERATION LOGIC
55
+ # ---------------------------------------------------------
56
+
57
+ def generate_education():
58
+ edu_list = []
59
+ # Always a bachelor/masters
60
+ year_start = random.randint(2015, 2022)
61
+ course = random.choice(DEGREES)
62
+ institution = fake.company() + " University"
63
+ year = f"{year_start} - {year_start + 4}"
64
+
65
+ edu_list.append({
66
+ "course": course,
67
+ "institution": institution,
68
+ "year": year
69
+ })
70
+
71
+ # Sometimes high school
72
+ if random.random() > 0.5:
73
+ edu_list.append({
74
+ "course": "Higher Secondary Education",
75
+ "institution": f"{fake.city()} High School",
76
+ "year": f"{year_start - 2} - {year_start}"
77
+ })
78
+
79
+ return edu_list
80
+
81
+ def generate_work_experience(role):
82
+ exp_list = []
83
+ num_jobs = random.randint(1, 3)
84
+ current_year = 2026
85
+
86
+ for _ in range(num_jobs):
87
+ start_year = current_year - random.randint(1, 3)
88
+ duration = f"{fake.month_name()[:3]} {start_year} - " + (f"{fake.month_name()[:3]} {current_year}" if current_year < 2026 else "Present")
89
+
90
+ # Descriptions with actual tech context
91
+ action = random.choice(["Developed", "Maintained", "Architected", "Optimized", "Spearheaded", "Collaborated on"])
92
+ project = random.choice(["a scalable microservices architecture", "a responsive web application", "a high-throughput data pipeline", "an internal dashboard", "a machine learning model"])
93
+ impact = random.choice(["reducing latency by 30%.", "increasing user engagement by 15%.", "saving $10k annually.", "improving deployment speed."])
94
+
95
+ description = f"{action} {project} {impact}. Worked within an Agile framework to deliver features on schedule."
96
+
97
+ exp_list.append({
98
+ "role": role if random.random() > 0.3 else random.choice(ROLES),
99
+ "company": fake.company(),
100
+ "years": duration,
101
+ "description": description
102
+ })
103
+ current_year = start_year - 1
104
+
105
+ return exp_list
106
+
107
+ def generate_projects(tech_pool):
108
+ proj_list = []
109
+ num_proj = random.randint(1, 3)
110
+
111
+ for _ in range(num_proj):
112
+ p_tech = random.sample(tech_pool, k=min(len(tech_pool), random.randint(2, 4)))
113
+ desc = f"Built a {fake.bs()} platform using {', '.join(p_tech)}. Implemented {fake.catch_phrase().lower()} to solve real-world industry challenges."
114
+
115
+ proj_list.append({
116
+ "tech_stack": p_tech,
117
+ "description": desc
118
+ })
119
+
120
+ return proj_list
121
+
122
+ def build_candidate():
123
+ user_id = str(uuid.uuid4())
124
+ role = random.choice(ROLES)
125
+
126
+ # 1. SOFT SKILLS (LIST)
127
+ cand_soft_skills = random.sample(SOFT_SKILLS, k=random.randint(3, 6))
128
+
129
+ # 2. TECH SKILLS (COMMA STRING LIKE IN DEBUG_PAYLOAD)
130
+ cand_tech_list = random.sample(TECH_SKILLS, k=random.randint(6, 12))
131
+ cand_tech_string = ", ".join(cand_tech_list)
132
+
133
+ # 3. CERTIFICATIONS (COMMA STRING)
134
+ cand_certs = ", ".join(random.sample(CERTIFICATIONS, k=random.randint(0, 2)))
135
+
136
+ # 4. EDUCATION
137
+ edu = generate_education()
138
+
139
+ # 5. EXPERIENCE
140
+ exp = generate_work_experience(role)
141
+
142
+ # 6. PROJECTS
143
+ proj = generate_projects(cand_tech_list)
144
+
145
+ # 7. SUMMARY
146
+ summary = f"{role} with {random.randint(1, 10)} years of experience. Proficient in {cand_tech_list[0]}, {cand_tech_list[1]}, and {cand_tech_list[2]}. Known for {cand_soft_skills[0].lower()} and {cand_soft_skills[1].lower()}. Dedicated to {fake.catch_phrase().lower()}."
147
+
148
+ payload = {
149
+ "id": user_id,
150
+ "resume_url": f"{user_id}/resume.pdf",
151
+ "file_hash": fake.sha256(),
152
+ "processed": True,
153
+ "updated_at": "now()",
154
+ "full_name": fake.name(),
155
+ "summary": summary,
156
+ "phone": fake.phone_number(),
157
+ "email": fake.email(),
158
+ "skills": cand_soft_skills, # Note: Soft skills as List
159
+ "technical_skills": cand_tech_string, # Note: Tech skills as string representation (matching actual IRIS DB ingest logic)
160
+ "education": edu,
161
+ "work_experience": exp,
162
+ "projects": proj,
163
+ "certifications": cand_certs if cand_certs else None
164
+ }
165
+
166
+ return payload
167
+
168
+
169
+ def generate_dataset(num_records=250):
170
+ print(f"🚀 Generating highly realistic dataset with {num_records} candidates...")
171
+ candidates = []
172
+ for _ in range(num_records):
173
+ candidates.append(build_candidate())
174
+
175
+ file_name = "realistic_synthetic_resumes.json"
176
+ with open(file_name, "w", encoding="utf-8") as f:
177
+ json.dump(candidates, f, indent=4)
178
+
179
+ print(f"✅ Successfully wrote {num_records} real-format JSON objects to '{file_name}'!")
180
+
181
+
182
+ if __name__ == "__main__":
183
+ generate_dataset(250)
backend/inspect_columns.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from supabase import create_client
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
9
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
10
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
11
+
12
+ async def inspect():
13
+ print("--- Profile Embeddings Columns ---")
14
+ p_res = client.table("profile_embeddings").select("*").limit(1).execute()
15
+ if p_res.data:
16
+ for k in sorted(p_res.data[0].keys()):
17
+ print(f" - {k}")
18
+ else:
19
+ print("No profile embeddings found")
20
+
21
+ print("\n--- Job Embeddings Columns ---")
22
+ j_res = client.table("job_embeddings").select("*").limit(1).execute()
23
+ if j_res.data:
24
+ for k in sorted(j_res.data[0].keys()):
25
+ print(f" - {k}")
26
+ else:
27
+ print("No job embeddings found")
28
+
29
+ if __name__ == "__main__":
30
+ asyncio.run(inspect())
backend/inspect_schema.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from supabase import create_client
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
9
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
10
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
11
+
12
+ async def inspect():
13
+ print("--- Profile Embeddings Sample ---")
14
+ p_res = client.table("profile_embeddings").select("*").limit(1).execute()
15
+ if p_res.data:
16
+ print(", ".join(p_res.data[0].keys()))
17
+ else:
18
+ print("No profile embeddings found")
19
+
20
+ print("\n--- Job Embeddings Sample ---")
21
+ j_res = client.table("job_embeddings").select("*").limit(1).execute()
22
+ if j_res.data:
23
+ print(", ".join(j_res.data[0].keys()))
24
+ else:
25
+ print("No job embeddings found")
26
+
27
+ if __name__ == "__main__":
28
+ asyncio.run(inspect())
backend/inspect_schema_fixed.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import json
4
+ from supabase import create_client
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ SUPABASE_URL = os.environ.get("SUPABASE_URL")
10
+ SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
11
+ client = create_client(SUPABASE_URL, SUPABASE_KEY)
12
+
13
+ async def inspect():
14
+ with open("schema_dump.txt", "w") as f:
15
+ f.write("--- Profile Embeddings ---\n")
16
+ p_res = client.table("profile_embeddings").select("*").limit(1).execute()
17
+ if p_res.data:
18
+ cols = sorted(p_res.data[0].keys())
19
+ for c in cols:
20
+ f.write(f"- {c}\n")
21
+ else:
22
+ f.write("No data\n")
23
+
24
+ f.write("\n--- Job Embeddings ---\n")
25
+ j_res = client.table("job_embeddings").select("*").limit(1).execute()
26
+ if j_res.data:
27
+ cols = sorted(j_res.data[0].keys())
28
+ for c in cols:
29
+ f.write(f"- {c}\n")
30
+ else:
31
+ f.write("No data\n")
32
+
33
+ if __name__ == "__main__":
34
+ asyncio.run(inspect())
backend/out_cmd.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Traceback (most recent call last):
2
+ File "C:\Users\sandr\IRIS2026\IRIS_FULL\backend\recalculate_scores.py", line 90, in <module>
3
+ asyncio.run(main())
4
+ ~~~~~~~~~~~^^^^^^^^
5
+ File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\runners.py", line 195, in run
6
+ return runner.run(main)
7
+ ~~~~~~~~~~^^^^^^
8
+ File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\runners.py", line 118, in run
9
+ return self._loop.run_until_complete(task)
10
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
11
+ File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\asyncio\base_events.py", line 725, in run_until_complete
12
+ return future.result()
13
+ ~~~~~~~~~~~~~^^
14
+ File "C:\Users\sandr\IRIS2026\IRIS_FULL\backend\recalculate_scores.py", line 23, in main
15
+ print("\U0001f50d Fetching all applications from Supabase...")
16
+ ~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17
+ File "C:\Users\sandr\AppData\Local\Programs\Python\Python313\Lib\encodings\cp1252.py", line 19, in encode
18
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
19
+ ~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20
+ UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f50d' in position 0: character maps to <undefined>
backend/realistic_synthetic_resumes.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/remove_triggers_for_profile_embeddings.sql ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- remove_triggers_for_profile_embeddings.sql
2
+ -- Run this in your Supabase SQL Editor to completely disable the triggers
3
+ -- causing the "embedding generation failed" error.
4
+
5
+ -- 1. Drop the trigger that refreshes recommendations
6
+ DROP TRIGGER IF EXISTS on_profile_embedding_change ON public.profile_embeddings;
7
+
8
+ -- 2. Drop the redundant webhook trigger
9
+ DROP TRIGGER IF EXISTS on_profile_embedding_upsert ON public.profile_embeddings;
10
+
11
+ -- 3. Drop the function that refreshes recommendations
12
+ DROP FUNCTION IF EXISTS public.trg_refresh_recommendations_for_user CASCADE;
13
+
14
+ -- 4. Drop the function for the webhook trigger
15
+ DROP FUNCTION IF EXISTS public.trg_on_profile_embedding_update CASCADE;
16
+
17
+ -- Now the Python upsert:
18
+ -- client.table("profile_embeddings").upsert(payload).execute()
19
+ -- will run purely as a database insert without any hidden functions interrupting it.
backend/repair_system_mismatches.sql ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- repair_system_mismatches.sql
2
+ -- Run this in Supabase SQL Editor to resolve the "j_emb" error and restore automatic matching.
3
+
4
+ -- 1. FIX THE MATCHING FUNCTION (The "j_emb" bug fix)
5
+ -- This function is used by triggers and the RPC.
6
+ -- We ENSURE it uses 'work_experience' for jobs and 'experience' for profiles.
7
+ CREATE OR REPLACE FUNCTION public.match_profile_job(p_id uuid, j_id uuid)
8
+ RETURNS json
9
+ LANGUAGE plpgsql
10
+ AS $function$
11
+ DECLARE
12
+ p_rec record;
13
+ j_rec record; -- Consistency check: Job record MUST use its real columns
14
+ s_sim float := 0; t_sim float := 0; exp_sim float := 0;
15
+ edu_sim float := 0; cert_sim float := 0; proj_sim float := 0;
16
+ s_score int := 0; t_score int := 0; e_score int := 0;
17
+ ed_score int := 0; c_score int := 0; p_score int := 0;
18
+ BEGIN
19
+ -- Fetch Profile Embeddings
20
+ SELECT * INTO p_rec FROM public.profile_embeddings WHERE id = p_id;
21
+ IF NOT FOUND THEN RETURN json_build_object('error', 'Profile embeddings not found'); END IF;
22
+
23
+ -- Fetch Job Embeddings
24
+ SELECT * INTO j_rec FROM public.job_embeddings WHERE job_id = j_id;
25
+ IF NOT FOUND THEN RETURN json_build_object('error', 'Job embeddings not found'); END IF;
26
+
27
+ -- Similarity with Cosine Distance (<=>)
28
+ IF p_rec.skills IS NOT NULL AND j_rec.skills IS NOT NULL THEN
29
+ s_sim := coalesce(nullif(1 - (p_rec.skills <=> j_rec.skills), 'NaN'), 0);
30
+ END IF;
31
+
32
+ IF p_rec.technical_skills IS NOT NULL AND j_rec.technical_skills IS NOT NULL THEN
33
+ t_sim := coalesce(nullif(1 - (p_rec.technical_skills <=> j_rec.technical_skills), 'NaN'), 0);
34
+ END IF;
35
+
36
+ -- FIX: Profile column is 'experience', Job column is 'work_experience'
37
+ IF p_rec.experience IS NOT NULL AND j_rec.work_experience IS NOT NULL THEN
38
+ exp_sim := coalesce(nullif(1 - (p_rec.experience <=> j_rec.work_experience), 'NaN'), 0);
39
+ END IF;
40
+
41
+ IF p_rec.education IS NOT NULL AND j_rec.education IS NOT NULL THEN
42
+ edu_sim := coalesce(nullif(1 - (p_rec.education <=> j_rec.education), 'NaN'), 0);
43
+ END IF;
44
+
45
+ IF p_rec.certifications IS NOT NULL THEN
46
+ cert_sim := coalesce(nullif(1 - (p_rec.certifications <=> coalesce(j_rec.technical_skills, j_rec.skills)), 'NaN'), 0);
47
+ END IF;
48
+
49
+ IF p_rec.projects IS NOT NULL AND j_rec.technical_skills IS NOT NULL THEN
50
+ proj_sim := coalesce(nullif(1 - (p_rec.projects <=> j_rec.technical_skills), 'NaN'), 0);
51
+ END IF;
52
+
53
+ -- Scaling to 0-100
54
+ s_score := (greatest(0, least(1, s_sim)) * 100)::int;
55
+ t_score := (greatest(0, least(1, t_sim)) * 100)::int;
56
+ e_score := (greatest(0, least(1, exp_sim)) * 100)::int;
57
+ ed_score := (greatest(0, least(1, edu_sim)) * 100)::int;
58
+ c_score := (greatest(0, least(1, cert_sim)) * 100)::int;
59
+ p_score := (greatest(0, least(1, proj_sim)) * 100)::int;
60
+
61
+ RETURN json_build_object(
62
+ 'match_score', ((t_score * 0.35) + (e_score * 0.20) + (p_score * 0.15) + (s_score * 0.10) + (ed_score * 0.10) + (c_score * 0.10))::int,
63
+ 'skills_match', s_score,
64
+ 'technical_skills_match', t_score,
65
+ 'work_experience_match', e_score,
66
+ 'education_match', ed_score,
67
+ 'certifications_match', c_score,
68
+ 'project_match', p_score
69
+ );
70
+ END;
71
+ $function$;
72
+
73
+ -- 2. CREATE THE JOB RECOMMENDATIONS RPC (Ranked Jobs)
74
+ -- We drop it first because changing the return schema requires it in Postgres.
75
+ DROP FUNCTION IF EXISTS public.get_job_recommendations(uuid, int);
76
+
77
+ CREATE OR REPLACE FUNCTION public.get_job_recommendations(p_user_id uuid, p_limit int DEFAULT 10)
78
+ RETURNS json
79
+ LANGUAGE plpgsql
80
+ AS $function$
81
+ DECLARE
82
+ results_json JSON;
83
+ BEGIN
84
+ SELECT json_agg(r) INTO results_json
85
+ FROM (
86
+ SELECT
87
+ j.id,
88
+ j.title,
89
+ j.location,
90
+ j.job_type,
91
+ j.salary_range,
92
+ c.name as company_name,
93
+ c.logo_url as company_logo,
94
+ (match_profile_job(p_user_id, j.id)->>'match_score')::int as match_score
95
+ FROM public.jobs j
96
+ JOIN public.companies c ON j.company_id = c.id
97
+ WHERE j.status = 'Active'
98
+ ORDER BY match_score DESC
99
+ LIMIT p_limit
100
+ ) r;
101
+
102
+ RETURN coalesce(results_json, '[]'::json);
103
+ END;
104
+ $function$;
backend/requirements.txt CHANGED
@@ -26,3 +26,4 @@ fastapi>=0.109.0
26
  uvicorn>=0.27.0
27
  python-multipart>=0.0.9
28
  google-genai>=0.2.0
 
 
26
  uvicorn>=0.27.0
27
  python-multipart>=0.0.9
28
  google-genai>=0.2.0
29
+ scikit-learn>=1.3.0
backend/script_output.txt ADDED
File without changes
backend/src/embeddings/benchmark_bge.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import torch
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ import psutil
6
+ import os
7
+
8
+ def benchmark_bge():
9
+ print("🚀 Starting BGE-M3 Efficiency Benchmark...")
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ print(f"💻 Device: {device}")
13
+
14
+ print("📥 Loading BAAI/bge-m3...")
15
+ start_load = time.time()
16
+ model = SentenceTransformer('BAAI/bge-m3', device=device)
17
+ print(f"⏱️ Load Time: {time.time() - start_load:.2f}s")
18
+
19
+ process = psutil.Process(os.getpid())
20
+ mem_info = process.memory_info()
21
+ print(f"📊 Memory Usage (RAM): {mem_info.rss / 1024 / 1024:.2f} MB")
22
+
23
+ sentences = [
24
+ "The quick brown fox jumps over the lazy dog.",
25
+ "Artificial intelligence is transforming the recruitment industry.",
26
+ "Candidate has 5 years of experience in Python and FastAPI.",
27
+ "Looking for a Senior Software Engineer with cloud expertise."
28
+ ] * 25 # 100 sentences
29
+
30
+ batch_sizes = [1, 4, 8, 16, 32]
31
+
32
+ print("\n--- Latency vs Batch Size ---")
33
+ print(f"{'Batch Size':<12} | {'Time (s)':<10} | {'Sec/Sent':<10} | {'Throughput (sent/s)':<20}")
34
+ print("-" * 65)
35
+
36
+ for bs in batch_sizes:
37
+ start_time = time.time()
38
+ # Warmup
39
+ model.encode(sentences[:bs], batch_size=bs, show_progress_bar=False)
40
+
41
+ # Actual benchmark
42
+ start_time = time.time()
43
+ model.encode(sentences, batch_size=bs, show_progress_bar=False)
44
+ end_time = time.time()
45
+
46
+ total_time = end_time - start_time
47
+ sec_per_sent = total_time / len(sentences)
48
+ throughput = len(sentences) / total_time
49
+
50
+ print(f"{bs:<12} | {total_time:<10.3f} | {sec_per_sent:<10.4f} | {throughput:<20.2f}")
51
+
52
+ print("\n✅ Benchmark Complete.")
53
+
54
+ if __name__ == "__main__":
55
+ benchmark_bge()
backend/src/embeddings/evaluate_quality.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import time
4
+ import json
5
+ import random
6
+ import numpy as np
7
+
8
+ # Set encoding for Windows terminals
9
+ if sys.platform == "win32":
10
+ import io
11
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
12
+
13
+ # Add backend to path
14
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
15
+
16
+ from backend.src.embeddings.local_embedder import generate_embedding
17
+
18
+ def cosine_similarity(v1, v2):
19
+ return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
20
+
21
+ def inject_noise(text, is_skill=False):
22
+ """Simulates real-world messy resumes with abbreviations, typos, and lowercasing."""
23
+ if random.random() < 0.3: # 30% chance to leave perfectly clean
24
+ return text
25
+
26
+ abbreviations = {
27
+ "Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS",
28
+ "React": "ReactJS", "Machine Learning": "ML", "Amazon Web Services": "AWS",
29
+ "Kubernetes": "K8s", "TypeScript": "TS", "User Experience": "UX"
30
+ }
31
+
32
+ if is_skill and text in abbreviations and random.random() > 0.5:
33
+ return abbreviations[text]
34
+
35
+ # Randomly lowercase everything (common in lazy resumes)
36
+ if random.random() > 0.7:
37
+ text = text.lower()
38
+
39
+ return text
40
+
41
+ def generate_adversarial_dataset():
42
+ """Generates 200 candidates with intentional distractors and noise."""
43
+ print("Building N=200 Adversarial Candidate Pool...")
44
+
45
+ domains = [
46
+ ("Frontend_React", ["React", "JavaScript", "Tailwind", "CSS", "TypeScript"]),
47
+ ("Frontend_Angular", ["Angular", "JavaScript", "SCSS", "HTML", "TypeScript"]),
48
+ ("Backend_Python", ["Python", "FastAPI", "PostgreSQL", "Docker", "Linux"]),
49
+ ("Backend_Java", ["Java", "Spring Boot", "MySQL", "Kafka", "Kubernetes"]),
50
+ ("Data_Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning"]),
51
+ ("Data_Engineer", ["Spark", "Airflow", "Python", "SQL", "AWS"]),
52
+ ("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "AWS"]),
53
+ ("Mobile_iOS", ["Swift", "Objective-C", "iOS", "XCode", "CoreData"]),
54
+ ("Mobile_Android", ["Kotlin", "Java", "Android Studio", "Jetpack", "Firebase"]),
55
+ ("Cybersecurity", ["Network Security", "Penetration Testing", "Firewalls", "Linux", "Python"])
56
+ ]
57
+ levels = ["Junior", "Mid-Level", "Senior", "Lead"]
58
+
59
+ candidates = []
60
+ golden_dataset = []
61
+
62
+ cand_counter = 1
63
+
64
+ # Generate 40 Queries (10 domains x 4 levels)
65
+ for domain_name, base_skills in domains:
66
+ for level in levels:
67
+ # 1. The Target Candidate (Golden)
68
+ target_id = f"cand_{cand_counter}_TARGET_{level}_{domain_name}"
69
+ target_skills = [inject_noise(s, True) for s in base_skills]
70
+ candidates.append({
71
+ "id": target_id,
72
+ "headline": f"{level} {domain_name.replace('_', ' ')} Engineer",
73
+ "summary": inject_noise(f"Experienced {level} professional in {domain_name}. Passionate about building scalable architectures."),
74
+ "skills": target_skills,
75
+ "experience": [inject_noise(f"Built systems using {target_skills[0]} and {target_skills[1]}.")]
76
+ })
77
+ cand_counter += 1
78
+
79
+ # The Query (Clean, formal HR language)
80
+ query = f"Hiring a {level} professional in {domain_name.replace('_', ' ')}. Must have strong experience with {base_skills[0]}, {base_skills[1]}, and {base_skills[2]}."
81
+ golden_dataset.append({"query": query, "relevant_id": target_id})
82
+
83
+ # 2. Seniority Distractor (Wrong level, perfect skills)
84
+ distractor_level = "Senior" if level == "Junior" else "Junior"
85
+ candidates.append({
86
+ "id": f"cand_{cand_counter}_DISTRACTOR_LEVEL_{domain_name}",
87
+ "headline": f"{distractor_level} {domain_name.replace('_', ' ')} Engineer",
88
+ "summary": f"A {distractor_level} developer specializing in {domain_name}.",
89
+ "skills": base_skills, # Same exact skills to confuse the model
90
+ "experience": [f"Worked extensively with {base_skills[0]}."]
91
+ })
92
+ cand_counter += 1
93
+
94
+ # 3. Skill Distractor (Right level, missing core skill, has similar skill)
95
+ altered_skills = base_skills.copy()
96
+ altered_skills[0] = "C++" # Replace core skill with something irrelevant
97
+ candidates.append({
98
+ "id": f"cand_{cand_counter}_DISTRACTOR_SKILL_{domain_name}",
99
+ "headline": f"{level} Software Engineer",
100
+ "summary": f"Focuses on {altered_skills[0]} and backend architecture.",
101
+ "skills": altered_skills,
102
+ "experience": [f"Maintained legacy {altered_skills[0]} codebases."]
103
+ })
104
+ cand_counter += 1
105
+
106
+ # 4 & 5. Random Noise Candidates (Fill out the 200)
107
+ for _ in range(2):
108
+ rand_domain = random.choice(domains)
109
+ candidates.append({
110
+ "id": f"cand_{cand_counter}_RANDOM",
111
+ "headline": f"{random.choice(levels)} {rand_domain[0]} Dev",
112
+ "summary": "Looking for new opportunities. Hobbies: hiking, dog walking, photography.",
113
+ "skills": [inject_noise(s, True) for s in rand_domain[1]],
114
+ "experience": ["General software development tasks."]
115
+ })
116
+ cand_counter += 1
117
+
118
+ return candidates, golden_dataset
119
+
120
+ def evaluate_adversarial():
121
+ print("🚀 Starting Adversarial Robustness Evaluation...")
122
+
123
+ candidates, golden_dataset = generate_adversarial_dataset()
124
+
125
+ print(f"📊 Dataset: {len(golden_dataset)} Queries | {len(candidates)} Candidates")
126
+ print("⚠️ Warning: Embedding 200 profiles on CPU will take time. Please wait...\n")
127
+
128
+ # 1. Embed Candidates (Flattening)
129
+ candidate_embeddings = []
130
+ start_time = time.time()
131
+
132
+ for i, c in enumerate(candidates):
133
+ rich_text = f"Headline: {c['headline']}. Summary: {c['summary']} Skills: {', '.join(c['skills'])}. Experience: {' '.join(c['experience'])}"
134
+ candidate_embeddings.append({
135
+ "id": c["id"],
136
+ "vec": generate_embedding(rich_text)
137
+ })
138
+ if (i+1) % 20 == 0:
139
+ print(f" -> Embedded {i+1}/200 candidates...")
140
+
141
+ print(f"✅ Embedding complete in {time.time() - start_time:.2f} seconds.\n")
142
+
143
+ # 2. Evaluate Queries
144
+ mrr_total = 0
145
+ hits_at_1 = 0
146
+ hits_at_3 = 0
147
+ hits_at_5 = 0
148
+
149
+ for item in golden_dataset:
150
+ query_vec = generate_embedding(item["query"])
151
+ target_id = item["relevant_id"]
152
+
153
+ scores = [(c_emb["id"], cosine_similarity(query_vec, c_emb["vec"])) for c_emb in candidate_embeddings]
154
+ scores.sort(key=lambda x: x[1], reverse=True)
155
+
156
+ rank = -1
157
+ for idx, (cid, sim) in enumerate(scores):
158
+ if cid == target_id:
159
+ rank = idx + 1
160
+ break
161
+
162
+ if rank != -1:
163
+ mrr_total += (1.0 / rank)
164
+ if rank == 1: hits_at_1 += 1
165
+ if rank <= 3: hits_at_3 += 1
166
+ if rank <= 5: hits_at_5 += 1
167
+
168
+ # 3. Final Aggregation
169
+ num_queries = len(golden_dataset)
170
+ final_mrr = mrr_total / num_queries
171
+ recall_1 = hits_at_1 / num_queries
172
+ recall_3 = hits_at_3 / num_queries
173
+ recall_5 = hits_at_5 / num_queries
174
+
175
+ print("="*45)
176
+ print("🛡️ ADVERSARIAL RETRIEVAL METRICS (N=200)")
177
+ print("="*45)
178
+ print(f"MRR (Mean Reciprocal Rank): {final_mrr:.4f}")
179
+ print("-" * 45)
180
+ print(f"Recall@1 (R@1): {recall_1*100:.1f}%")
181
+ print(f"Recall@3 (R@3): {recall_3*100:.1f}%")
182
+ print(f"Recall@5 (R@5): {recall_5*100:.1f}%")
183
+ print("="*45)
184
+
185
+ # Save to JSON for the guide/paper
186
+ with open("quality_metrics_adversarial.json", "w") as f:
187
+ json.dump({
188
+ "dataset": "N=200 Adversarial (Noise + Distractors)",
189
+ "mrr": final_mrr,
190
+ "recall_1": recall_1,
191
+ "recall_3": recall_3
192
+ }, f, indent=4)
193
+
194
+ print("📄 Results securely saved to 'quality_metrics_adversarial.json'")
195
+
196
+ if __name__ == "__main__":
197
+ evaluate_adversarial()
backend/src/embeddings/job_embed.py CHANGED
@@ -92,7 +92,7 @@ def safe_generate_and_store_job_embeddings(client, job_id: str) -> None:
92
  "skills": generate_list_embedding(skills),
93
  "technical_skills": generate_list_embedding(technical_skills),
94
  "tools": generate_list_embedding(tools),
95
- "experience": generate_embedding(experience),
96
  "education": generate_embedding(education),
97
  "certifications": generate_list_embedding(certifications),
98
  "updated_at": "now()"
 
92
  "skills": generate_list_embedding(skills),
93
  "technical_skills": generate_list_embedding(technical_skills),
94
  "tools": generate_list_embedding(tools),
95
+ "work_experience": generate_embedding(experience),
96
  "education": generate_embedding(education),
97
  "certifications": generate_list_embedding(certifications),
98
  "updated_at": "now()"
backend/src/embeddings/match_benchmark_granular.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import time
4
+ import json
5
+ import random
6
+ import numpy as np
7
+ import torch
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ # Set encoding for Windows terminals
11
+ # Removing potentially problematic wrapper for background logging
12
+ # if sys.platform == "win32":
13
+ # import io
14
+ # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
15
+
16
+ # Add backend to path
17
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
18
+
19
+ # ---------------------------------------------------------------------
20
+ # UTILS & NOISE SIMULATION
21
+ # ---------------------------------------------------------------------
22
+
23
+ def cosine_similarity(v1, v2):
24
+ if v1 is None or v2 is None: return 0.0
25
+ norm1 = np.linalg.norm(v1)
26
+ norm2 = np.linalg.norm(v2)
27
+ if norm1 == 0 or norm2 == 0: return 0.0
28
+ return np.dot(v1, v2) / (norm1 * norm2)
29
+
30
+ def jaccard_similarity(list1, list2):
31
+ s1 = set([str(x).lower().strip() for x in list1])
32
+ s2 = set([str(x).lower().strip() for x in list2])
33
+ if not s1 or not s2: return 0.0
34
+ return len(s1.intersection(s2)) / len(s1.union(s2))
35
+
36
+ def inject_real_world_noise(text, is_skill=False):
37
+ """Simulates typos, abbreviations, and informal language."""
38
+ if random.random() < 0.2: return text # 20% keep clean
39
+
40
+ abbrev = {
41
+ "Python": "Py", "PostgreSQL": "Postgres", "JavaScript": "JS",
42
+ "React": "ReactJS", "Machine Learning": "ML", "Kubernetes": "K8s",
43
+ "TypeScript": "TS", "Amazon Web Services": "AWS", "Google Cloud": "GCP"
44
+ }
45
+
46
+ # Apply abbreviation
47
+ if is_skill and text in abbrev and random.random() > 0.4:
48
+ return abbrev[text]
49
+
50
+ # Inject "Messy" Resume fillers
51
+ fillers = ["Highly skilled in", "Practical knowledge of", "Working with", "Extensive experience in"]
52
+ if random.random() > 0.7 and not is_skill:
53
+ text = f"{random.choice(fillers)} {text}"
54
+
55
+ # Random case noise
56
+ if random.random() > 0.8:
57
+ text = text.lower()
58
+
59
+ return text
60
+
61
+ # ---------------------------------------------------------------------
62
+ # DATASET GENERATION
63
+ # ---------------------------------------------------------------------
64
+
65
+ def generate_bench_dataset(num_candidates=100):
66
+ print(f"🛠️ Generating N={num_candidates} Real-World Synthetic Dataset...")
67
+
68
+ domains = [
69
+ ("Cloud_Architect", ["AWS", "Terraform", "Kubernetes", "Docker"], ["Solutions Associate", "AWS Architect"]),
70
+ ("Backend_Dev", ["Python", "FastAPI", "PostgreSQL", "Redis"], ["Python Cert", "FastAPI Expert"]),
71
+ ("Frontend_Dev", ["React", "TypeScript", "Tailwind", "Next.js"], ["Meta React Cert", "JS Expert"]),
72
+ ("Data_Science", ["Python", "PyTorch", "SQL", "Pandas"], ["TensorFlow Cert", "Data Pro"]),
73
+ ]
74
+
75
+ candidates = []
76
+ queries = [] # JDs
77
+
78
+ # We generate balanced pairs
79
+ for i in range(num_candidates):
80
+ domain_name, skills, certs = domains[i % len(domains)]
81
+ level = random.choice(["Junior", "Senior", "Lead"])
82
+
83
+ # 1. The Candidate Data
84
+ cand_id = f"cand_{i}_{domain_name}"
85
+ noisy_skills = [inject_real_world_noise(s, True) for s in skills]
86
+
87
+ candidates.append({
88
+ "id": cand_id,
89
+ "skills": noisy_skills,
90
+ "tech_skills": noisy_skills, # Project uses both
91
+ "experience": [f"Developed {domain_name} solutions at Tech {i}."],
92
+ "certifications": [certs[0]] if random.random() > 0.5 else [],
93
+ "full_text": f"{level} {domain_name}. Skills: {', '.join(noisy_skills)}"
94
+ })
95
+
96
+ # 2. The Matching Query (JD) - Formal Clean Version
97
+ jd_text = f"We are looking for a {level} {domain_name.replace('_', ' ')}. Must have expertise in {skills[0]}, {skills[1]}, and {skills[2]}."
98
+ queries.append({
99
+ "query": jd_text,
100
+ "relevant_id": cand_id,
101
+ "jd_structured": {
102
+ "skills": skills,
103
+ "tech_skills": skills,
104
+ "experience": [f"{level} {domain_name} experience."],
105
+ "certifications": certs
106
+ }
107
+ })
108
+
109
+ return candidates, queries
110
+
111
+ # ---------------------------------------------------------------------
112
+ # BENCHMARK RUNNER
113
+ # ---------------------------------------------------------------------
114
+
115
+ def run_benchmark():
116
+ device = "cuda" if torch.cuda.is_available() else "cpu"
117
+ print(f"🚀 Loading Models on {device}...", flush=True)
118
+
119
+ # Load Models
120
+ bert_model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
121
+ bge_model = SentenceTransformer('BAAI/bge-m3', device=device)
122
+
123
+ candidates, queries = generate_bench_dataset(250)
124
+
125
+ # Save the synthetic dataset to a JSON file for inspection
126
+ with open("synthetic_dataset_adversarial.json", "w", encoding="utf-8") as f:
127
+ json.dump({"candidates": candidates, "queries": queries}, f, indent=4)
128
+ print(f"💾 Saved generated synthetic dataset to 'synthetic_dataset_adversarial.json'", flush=True)
129
+
130
+ # Pre-calculate Candidate Embeddings
131
+ print("🧠 Indexing Candidates...")
132
+ start_idx = time.time()
133
+ for i, c in enumerate(candidates):
134
+ # BERT Flattened
135
+ c["bert_vec"] = bert_model.encode(c["full_text"])
136
+ # BGE Flattened
137
+ c["bge_flat_vec"] = bge_model.encode(c["full_text"])
138
+ # BGE Granular (Project Method)
139
+ c["bge_granular"] = {
140
+ "skills": bge_model.encode(" ".join(c["skills"])),
141
+ "tech_skills": bge_model.encode(" ".join(c["tech_skills"])),
142
+ "experience": bge_model.encode(" ".join(c["experience"])),
143
+ "certs": bge_model.encode(" ".join(c["certifications"])) if c["certifications"] else np.zeros(1024)
144
+ }
145
+ if (i+1) % 50 == 0:
146
+ print(f" -> Indexed {i+1}/{len(candidates)} candidates...", flush=True)
147
+ print(f"✅ Indexed in {time.time() - start_idx:.2f}s")
148
+
149
+ # Evaluation Loops
150
+ methods = ["Jaccard_Baseline", "BERT_Flattened", "BGE_Flattened", "BGE_Granular_Weighted"]
151
+ results = {m: {"mrr": 0, "r1": 0, "r3": 0} for m in methods}
152
+
153
+ weights = {"skills": 0.35, "tech_skills": 0.35, "experience": 0.20, "certs": 0.10}
154
+
155
+ print("\nEvaluating Queries...")
156
+ for i, q in enumerate(queries):
157
+ target_id = q["relevant_id"]
158
+ jd_text = q["query"]
159
+ jd_s = q["jd_structured"]
160
+
161
+ # Embed Query
162
+ q_bert = bert_model.encode(jd_text)
163
+ q_bge_flat = bge_model.encode(jd_text)
164
+ q_bge_g = {
165
+ "skills": bge_model.encode(" ".join(jd_s["skills"])),
166
+ "tech_skills": bge_model.encode(" ".join(jd_s["tech_skills"])),
167
+ "experience": bge_model.encode(" ".join(jd_s["experience"])),
168
+ "certs": bge_model.encode(" ".join(jd_s["certifications"]))
169
+ }
170
+
171
+ if (i+1) % 25 == 0:
172
+ print(f" -> Evaluated {i+1}/{len(queries)} queries...", flush=True)
173
+
174
+ # Calculate scores for all candidates
175
+ cand_scores = []
176
+ for c in candidates:
177
+ # 1. Jaccard
178
+ jac = jaccard_similarity(jd_s["skills"], c["skills"])
179
+ # 2. BERT
180
+ ber = cosine_similarity(q_bert, c["bert_vec"])
181
+ # 3. BGE Flat
182
+ bgf = cosine_similarity(q_bge_flat, c["bge_flat_vec"])
183
+ # 4. BGE Granular Weighted
184
+ bgg = (
185
+ cosine_similarity(q_bge_g["skills"], c["bge_granular"]["skills"]) * weights["skills"] +
186
+ cosine_similarity(q_bge_g["tech_skills"], c["bge_granular"]["tech_skills"]) * weights["tech_skills"] +
187
+ cosine_similarity(q_bge_g["experience"], c["bge_granular"]["experience"]) * weights["experience"] +
188
+ cosine_similarity(q_bge_g["certs"], c["bge_granular"]["certs"]) * weights["certs"]
189
+ )
190
+
191
+ cand_scores.append({
192
+ "id": c["id"],
193
+ "Jaccard_Baseline": jac,
194
+ "BERT_Flattened": ber,
195
+ "BGE_Flattened": bgf,
196
+ "BGE_Granular_Weighted": bgg
197
+ })
198
+
199
+ # Rank and Calc Metrics
200
+ for m in methods:
201
+ sorted_cands = sorted(cand_scores, key=lambda x: x[m], reverse=True)
202
+ rank = next(i for i, x in enumerate(sorted_cands) if x["id"] == target_id) + 1
203
+
204
+ results[m]["mrr"] += (1.0 / rank)
205
+ if rank == 1: results[m]["r1"] += 1
206
+ if rank <= 3: results[m]["r3"] += 1
207
+
208
+ # Print Results Table
209
+ num_q = len(queries)
210
+ print("\n" + "="*65)
211
+ print(f"{'Method':<25} | {'MRR':<8} | {'Recall@1':<10} | {'Recall@3':<10}")
212
+ print("-" * 65)
213
+
214
+ for m in methods:
215
+ mrr = results[m]["mrr"] / num_q
216
+ r1 = (results[m]["r1"] / num_q) * 100
217
+ r3 = (results[m]["r3"] / num_q) * 100
218
+ print(f"{m:<25} | {mrr:.4f} | {r1:>8.1f}% | {r3:>8.1f}%", flush=True)
219
+ print("="*65, flush=True)
220
+
221
+ # Save to file
222
+ summary = {m: {"mrr": results[m]["mrr"]/num_q, "r1": results[m]["r1"]/num_q, "r3": results[m]["r3"]/num_q} for m in methods}
223
+ with open("match_benchmark_results.json", "w") as f:
224
+ json.dump(summary, f, indent=4)
225
+ print(f"\n📄 Results saved to 'match_benchmark_results.json'", flush=True)
226
+
227
+ if __name__ == "__main__":
228
+ run_benchmark()
backend/src/embeddings/profile_entities_bench.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import time
4
+ import numpy as np
5
+ import json
6
+
7
+ # Add backend to path
8
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
9
+
10
+ from backend.src.embeddings.local_embedder import generate_embedding, generate_list_embedding
11
+
12
+ def generate_structured_profiles(num_samples=50):
13
+ """Generates synthetic resumes split into specific entity fields."""
14
+ print(f"Generating {num_samples} structured synthetic profiles...")
15
+
16
+ domains = [
17
+ ("Frontend", ["React", "JavaScript", "Tailwind", "CSS", "HTML", "Redux", "TypeScript", "Jest"]),
18
+ ("Backend", ["Python", "FastAPI", "PostgreSQL", "Docker", "AWS", "Linux", "Redis", "Kafka"]),
19
+ ("Data Science", ["Python", "Pandas", "PyTorch", "SQL", "Machine Learning", "NLP", "TensorFlow", "R"]),
20
+ ("DevOps", ["Kubernetes", "Docker", "Terraform", "CI/CD", "Jenkins", "AWS", "Bash", "Ansible"]),
21
+ ("Mobile", ["Swift", "Kotlin", "React Native", "Flutter", "iOS", "Android", "Firebase", "SQLite"])
22
+ ]
23
+ levels = ["Junior", "Mid-Level", "Senior", "Lead", "Principal"]
24
+
25
+ profiles = []
26
+ for i in range(num_samples):
27
+ domain_name, domain_skills = domains[i % len(domains)]
28
+ level = levels[i % len(levels)]
29
+
30
+ # Randomize skills count slightly per profile (5 to 8 skills)
31
+ np.random.seed(i)
32
+ skills_subset = list(np.random.choice(domain_skills, size=np.random.randint(5, 9), replace=False))
33
+
34
+ profile = {
35
+ "profile_id": f"cand_{i+1}_{domain_name.lower()}",
36
+ "headline": f"{level} {domain_name} Engineer",
37
+ "summary": f"Dedicated {level} {domain_name} professional with a proven track record of building scalable systems and working in agile environments. Passionate about clean code and modern architectures.",
38
+ "skills": skills_subset,
39
+ "experience": [
40
+ f"{level} Engineer at TechCorp: Spearheaded the migration to cloud infrastructure and improved system performance by 40%.",
41
+ f"Software Developer at Startup Inc: Developed RESTful APIs and collaborated with the frontend team to deliver features.",
42
+ f"Intern at Legacy Systems: Assisted in maintaining codebases and writing unit tests."
43
+ ]
44
+ }
45
+ profiles.append(profile)
46
+ return profiles
47
+
48
+ def profile_entities_scaled():
49
+ num_samples = 50
50
+ profiles = generate_structured_profiles(num_samples)
51
+
52
+ print(f"\n🚀 Starting Entity-to-Embedding Efficiency Benchmark (N={num_samples})...")
53
+
54
+ # Tracking arrays
55
+ summary_times = []
56
+ headline_times = []
57
+ skills_times = []
58
+ exp_times = []
59
+ total_times = []
60
+
61
+ for i, p in enumerate(profiles):
62
+ start_total = time.time()
63
+
64
+ # 1. Profile Headline
65
+ start = time.time()
66
+ generate_embedding(p["headline"])
67
+ headline_times.append((time.time() - start) * 1000)
68
+
69
+ # 2. Profile Summary
70
+ start = time.time()
71
+ generate_embedding(p["summary"])
72
+ summary_times.append((time.time() - start) * 1000)
73
+
74
+ # 3. Profile Skills (Batch)
75
+ start = time.time()
76
+ generate_list_embedding(p["skills"])
77
+ skills_times.append((time.time() - start) * 1000)
78
+
79
+ # 4. Profile Experience (Batch)
80
+ start = time.time()
81
+ generate_list_embedding(p["experience"])
82
+ exp_times.append((time.time() - start) * 1000)
83
+
84
+ # Total
85
+ total_times.append((time.time() - start_total) * 1000)
86
+
87
+ if (i + 1) % 10 == 0:
88
+ print(f" -> Processed {i + 1}/{num_samples} profiles...")
89
+
90
+ # Calculate statistics
91
+ results = [
92
+ "IRIS Entity-to-Embedding Efficiency Results (Scaled)",
93
+ f"Total Profiles Evaluated: {num_samples}",
94
+ "-" * 60,
95
+ f"{'Entity Type':<15} | {'Mean Latency (ms)':<20} | {'Std Dev (ms)':<15}",
96
+ "-" * 60,
97
+ f"{'Headline':<15} | {np.mean(headline_times):<20.2f} | {np.std(headline_times):<15.2f}",
98
+ f"{'Summary':<15} | {np.mean(summary_times):<20.2f} | {np.std(summary_times):<15.2f}",
99
+ f"{'Skills (List)':<15} | {np.mean(skills_times):<20.2f} | {np.std(skills_times):<15.2f}",
100
+ f"{'Experience (List)':<15}| {np.mean(exp_times):<20.2f} | {np.std(exp_times):<15.2f}",
101
+ "-" * 60,
102
+ f"MEAN TOTAL PER PROFILE: {np.mean(total_times):.2f} ms",
103
+ f"Average Throughput: {1000 / np.mean(total_times):.3f} profiles/sec"
104
+ ]
105
+
106
+ output_text = "\n".join(results)
107
+ print("\n" + output_text)
108
+
109
+ with open("entity_benchmark_scaled_results.txt", "w") as f:
110
+ f.write(output_text)
111
+
112
+ print("\n📄 Results saved to 'entity_benchmark_scaled_results.txt'.")
113
+
114
+ if __name__ == "__main__":
115
+ profile_entities_scaled()
backend/src/matching/similarity.py CHANGED
@@ -3,13 +3,25 @@ import numpy as np
3
  from typing import Dict, Any, List
4
  from supabase import Client
5
 
6
- def cosine_similarity(v1: List[float], v2: List[float]) -> float:
7
- """Calculates cosine similarity between two vectors."""
8
- if not v1 or not v2 or len(v1) != len(v2):
 
 
 
 
 
 
 
 
 
 
 
 
9
  return 0.0
10
 
11
- a = np.array(v1)
12
- b = np.array(v2)
13
 
14
  # Check if vectors are zero vectors
15
  if np.all(a == 0) or np.all(b == 0):
@@ -51,27 +63,37 @@ async def calculate_granular_match_score(client: Client, candidate_id: str, job_
51
  print(f"❌ Database error in match score: {e}")
52
  return {"total_score": 0, "breakdown": {}, "error": str(e)}
53
 
54
- # 2. Define Weights
55
- # These could eventually be user-defined
56
  WEIGHTS = {
57
- "skills": 0.35,
58
  "technical_skills": 0.35,
59
  "experience": 0.20,
 
 
 
60
  "certifications": 0.10
61
  }
62
 
63
  # 3. Calculate Individual Similarities
64
  scores = {}
65
 
66
- # Skill matching
67
- scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
68
  scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
69
 
70
- # Experience matching
71
- scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("experience"))
 
 
 
 
 
 
 
 
 
72
 
73
- # Certifications matching
74
- scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_emb.get("certifications"))
 
75
 
76
  # 4. Calculate Weighted Total
77
  total_score = 0
@@ -79,12 +101,13 @@ async def calculate_granular_match_score(client: Client, candidate_id: str, job_
79
 
80
  for key, weight in WEIGHTS.items():
81
  if scores.get(key) is not None:
82
- total_score += scores[key] * weight
 
83
  available_weight += weight
84
 
85
- # Normalize if some fields were missing (though WEIGHTS sums to 1.0)
86
  if available_weight > 0:
87
- final_score = (total_score / available_weight) * 100
88
  else:
89
  final_score = 0
90
 
 
3
  from typing import Dict, Any, List
4
  from supabase import Client
5
 
6
+ def cosine_similarity(v1: Any, v2: Any) -> float:
7
+ """Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
8
+ def parse_vector(v):
9
+ if isinstance(v, str):
10
+ try:
11
+ # Remove brackets and split by comma
12
+ return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
13
+ except Exception:
14
+ return []
15
+ return v if isinstance(v, list) else []
16
+
17
+ vec1 = parse_vector(v1)
18
+ vec2 = parse_vector(v2)
19
+
20
+ if not vec1 or not vec2 or len(vec1) != len(vec2):
21
  return 0.0
22
 
23
+ a = np.array(vec1)
24
+ b = np.array(vec2)
25
 
26
  # Check if vectors are zero vectors
27
  if np.all(a == 0) or np.all(b == 0):
 
63
  print(f"❌ Database error in match score: {e}")
64
  return {"total_score": 0, "breakdown": {}, "error": str(e)}
65
 
66
+ # 2. Define Weights (Matching SQL function public.match_profile_job)
 
67
  WEIGHTS = {
 
68
  "technical_skills": 0.35,
69
  "experience": 0.20,
70
+ "projects": 0.15,
71
+ "skills": 0.10,
72
+ "education": 0.10,
73
  "certifications": 0.10
74
  }
75
 
76
  # 3. Calculate Individual Similarities
77
  scores = {}
78
 
79
+ # Technical Skills
 
80
  scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
81
 
82
+ # Experience
83
+ scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))
84
+
85
+ # Projects (Compare profile projects vs job technical skills)
86
+ scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))
87
+
88
+ # Skills
89
+ scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
90
+
91
+ # Education
92
+ scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))
93
 
94
+ # Certifications (Compare profile certs vs job technical skills or skills)
95
+ job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
96
+ scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)
97
 
98
  # 4. Calculate Weighted Total
99
  total_score = 0
 
101
 
102
  for key, weight in WEIGHTS.items():
103
  if scores.get(key) is not None:
104
+ # Scale to 100 like SQL
105
+ total_score += (scores[key] * 100) * weight
106
  available_weight += weight
107
 
108
+ # Normalize
109
  if available_weight > 0:
110
+ final_score = total_score / available_weight
111
  else:
112
  final_score = 0
113
 
backend/src/services/clustering_service.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ from sklearn.cluster import KMeans
4
+ from typing import List, Dict, Any
5
+ from google import genai
6
+ import google.genai.types as types
7
+ from supabase import create_client, Client
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ class ClusteringService:
14
+ def __init__(self):
15
+ url = os.environ.get("SUPABASE_URL")
16
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
17
+ self.client: Client = create_client(url, key)
18
+ self.gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
19
+
20
+ def fetch_all_embeddings(self) -> List[Dict[str, Any]]:
21
+ """Fetch IDs and concatenated embeddings for all profiles."""
22
+ print("🔍 Fetching profile embeddings...")
23
+ # We'll use 'technical_skills' or 'headline' as a representative embedding for clustering
24
+ # Or concatenate multiple if available. For simplicity, we use 'technical_skills'
25
+ resp = self.client.table("profile_embeddings").select("id, technical_skills").execute()
26
+ return resp.data
27
+
28
+ def perform_clustering(self, data: List[Dict[str, Any]], n_clusters: int = 5):
29
+ """Perform K-Means clustering on the fetched embeddings."""
30
+ if not data:
31
+ print("⚠️ No data to cluster.")
32
+ return []
33
+
34
+ # Extract vectors
35
+ X = []
36
+ ids = []
37
+ import json
38
+ for item in data:
39
+ raw_vec = item.get("technical_skills")
40
+ if raw_vec:
41
+ try:
42
+ # If it's a string, parse it
43
+ if isinstance(raw_vec, str):
44
+ # Some versions of postgrest return vectors as strings like '[0.1, 0.2]'
45
+ vec = json.loads(raw_vec)
46
+ else:
47
+ vec = raw_vec
48
+
49
+ X.append(vec)
50
+ ids.append(item["id"])
51
+ except Exception as e:
52
+ print(f"⚠️ Failed to parse embedding for {item['id']}: {e}")
53
+
54
+ if len(X) < n_clusters:
55
+ n_clusters = max(1, len(X))
56
+
57
+ print(f"🤖 Performing K-Means clustering (K={n_clusters})...")
58
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
59
+ labels = kmeans.fit_predict(X)
60
+
61
+ return [{"id": ids[i], "cluster": int(labels[i])} for i in range(len(ids))]
62
+
63
+ def generate_labels_for_clusters(self, clustered_data: List[Dict[str, Any]]) -> Dict[int, str]:
64
+ """Generate human-readable labels for each cluster using Gemini."""
65
+ cluster_groups = {}
66
+ for item in clustered_data:
67
+ c = item["cluster"]
68
+ if c not in cluster_groups:
69
+ cluster_groups[c] = []
70
+ cluster_groups[c].append(item["id"])
71
+
72
+ labels = {}
73
+ for cluster_id, user_ids in cluster_groups.items():
74
+ # Fetch sample details for these users to describe the cluster
75
+ sample_ids = user_ids[:5]
76
+ profiles_resp = self.client.table("profiles").select("headline, technical_skills").in_("id", sample_ids).execute()
77
+
78
+ sample_text = "\n".join([
79
+ f"- {p.get('headline')} (Skills: {p.get('technical_skills')})"
80
+ for p in profiles_resp.data
81
+ ])
82
+
83
+ prompt = f"""
84
+ You are an expert HR Talent Acquisition Specialist.
85
+ Analyze the following representative professional profiles from a talent pool and provide a perfect, professional job title that best encapsulates the entire group.
86
+
87
+ CRITERIA:
88
+ - Concise: Exactly 2-4 words.
89
+ - Professional: Use industry-standard terminology (e.g., "Full Stack Engineer", "DevOps Architect").
90
+ - Accurate: Reflect the common denominator in seniority and technical domain.
91
+ - Formatting: Return ONLY the title string, no quotes, no extra text.
92
+
93
+ REPRESENTATIVE PROFILES:
94
+ {sample_text}
95
+
96
+ PERFECT JOB TITLE:
97
+ """
98
+
99
+ import time
100
+ max_retries = 3
101
+ label = "Unknown Group"
102
+
103
+ for attempt in range(max_retries):
104
+ try:
105
+ response = self.gemini_client.models.generate_content(
106
+ model="gemini-2.5-flash-lite",
107
+ contents=prompt,
108
+ config=types.GenerateContentConfig(temperature=0)
109
+ )
110
+ label = response.text.strip().replace('"', '')
111
+ break
112
+ except Exception as e:
113
+ if attempt < max_retries - 1:
114
+ wait = 2 ** (attempt + 1)
115
+ print(f"⚠️ Labeling failed for Cluster {cluster_id}. Retrying in {wait}s... ({e})")
116
+ time.sleep(wait)
117
+ else:
118
+ print(f"❌ Labeling failed for Cluster {cluster_id} after {max_retries} attempts.")
119
+
120
+ labels[cluster_id] = label
121
+ print(f"✅ Cluster {cluster_id} Label: {label}")
122
+ time.sleep(1) # Small pause between clusters
123
+
124
+ return labels
125
+
126
+ def update_database_with_labels(self, clustered_data: List[Dict[str, Any]], cluster_labels: Dict[int, str]):
127
+ """Update the profiles table with the new cluster labels."""
128
+ print("💾 Updating database with cluster labels...")
129
+ for item in clustered_data:
130
+ user_id = item["id"]
131
+ label = cluster_labels[item["cluster"]]
132
+
133
+ self.client.table("profiles").update({"cluster_label": label}).eq("id", user_id).execute()
134
+ print("✨ Database successfully updated.")
135
+
136
+ def run_clustering_pipeline(self, n_clusters: int = 5):
137
+ """Orchestrate the full clustering pipeline."""
138
+ data = self.fetch_all_embeddings()
139
+ clustered_results = self.perform_clustering(data, n_clusters)
140
+ if not clustered_results:
141
+ return
142
+
143
+ labels = self.generate_labels_for_clusters(clustered_results)
144
+ self.update_database_with_labels(clustered_results, labels)
145
+
146
+ if __name__ == "__main__":
147
+ service = ClusteringService()
148
+ service.run_clustering_pipeline(n_clusters=5)
backend/src/services/test_clustering.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ # Add backend/src to path
5
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
6
+
7
+ from src.services.clustering_service import ClusteringService
8
+
9
+ def test_clustering_pipeline():
10
+ print("🚀 Starting Clustering Pipeline Test...")
11
+ service = ClusteringService()
12
+
13
+ try:
14
+ # Run clustering with 5 clusters for more granular grouping
15
+ service.run_clustering_pipeline(n_clusters=5)
16
+ print("✅ Pipeline test completed successfully.")
17
+ except Exception as e:
18
+ print(f"❌ Pipeline test failed: {e}")
19
+
20
+ if __name__ == "__main__":
21
+ test_clustering_pipeline()
backend/src/services/verify_labels.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ from supabase import create_client, Client
4
+ from dotenv import load_dotenv
5
+
6
+ # Add backend/src to path
7
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ def verify_labels():
13
+ print("🔍 Fetching generated cluster labels from database...")
14
+ url = os.environ.get("SUPABASE_URL")
15
+ key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
16
+ client: Client = create_client(url, key)
17
+
18
+ resp = client.table("profiles").select("full_name, headline, cluster_label").not_.is_("cluster_label", "null").order("cluster_label").execute()
19
+
20
+ if not resp or not hasattr(resp, 'data') or resp.data is None:
21
+ print("⚠️ No cluster labels found or database error.")
22
+ return
23
+
24
+ print(f"\n{'Name':<25} | {'Original Headline':<35} | {'Cluster Label'}")
25
+ print("-" * 85)
26
+ for p in resp.data[:15]: # Show first 15
27
+ name = (p.get('full_name') or "Unknown")[:25]
28
+ headline = (p.get('headline') or "N/A")[:35]
29
+ label = p.get('cluster_label') or "Unknown"
30
+ print(f"{name:<25} | {headline:<35} | {label}")
31
+
32
+ # Show distinct labels
33
+ all_labels = [p.get('cluster_label') for p in resp.data if p.get('cluster_label')]
34
+ distinct_labels = sorted(list(set(all_labels)))
35
+ print("\n📦 Distinct Talent Pools (Clusters):")
36
+ for idx, l in enumerate(distinct_labels, 1):
37
+ count = all_labels.count(l)
38
+ print(f"{idx}. {l} ({count} candidates)")
39
+
40
+ if __name__ == "__main__":
41
+ verify_labels()
backend/supabase_ingest.py CHANGED
@@ -57,7 +57,7 @@ if SUPABASE_URL and SUPABASE_KEY:
57
  else:
58
  print("⚠️ Warning: Supabase Credentials not found in environment. Only library functions will fail if called without a client.")
59
 
60
- ALLOWED_EXTENSIONS = {".pdf", ".docx"}
61
 
62
  # ---------------------------------------------------------------------
63
  # UTILS
@@ -212,15 +212,15 @@ def upsert_profile(client, payload: Dict[str, Any]):
212
  # UNIFIED PROCESSING FUNCTION (Called by API and Main)
213
  # ---------------------------------------------------------------------
214
 
215
- def process_resume(client, user_id: str, file_path: str, temp_dir: str = "data/resumes/raw") -> Dict[str, Any]:
216
  """
217
  Downloads, extracts, and upserts a resume.
218
  Used by both the API (real-time) and the main script (batch).
219
  """
220
  try:
221
  # 1. Download
222
- print(f"⬇️ Downloading {file_path}...")
223
- local_path = download_object(client, "resume", file_path, temp_dir)
224
 
225
  # 2. Extract
226
  print("🧠 Sending to Gemini...")
@@ -312,6 +312,11 @@ def main():
312
  except Exception as e:
313
  print(f" ⚠️ Embedding generation failed (non-critical): {e}")
314
 
 
 
 
 
 
315
  except Exception as e:
316
  print(f" ❌ Pipeline failed for this file: {e}")
317
 
 
57
  else:
58
  print("⚠️ Warning: Supabase Credentials not found in environment. Only library functions will fail if called without a client.")
59
 
60
+ ALLOWED_EXTENSIONS = {".pdf", ".docx", ".doc"}
61
 
62
  # ---------------------------------------------------------------------
63
  # UTILS
 
212
  # UNIFIED PROCESSING FUNCTION (Called by API and Main)
213
  # ---------------------------------------------------------------------
214
 
215
+ def process_resume(client, user_id: str, file_path: str, bucket: str = "resume", temp_dir: str = "data/resumes/raw") -> Dict[str, Any]:
216
  """
217
  Downloads, extracts, and upserts a resume.
218
  Used by both the API (real-time) and the main script (batch).
219
  """
220
  try:
221
  # 1. Download
222
+ print(f"⬇️ Downloading {file_path} from bucket '{bucket}'...")
223
+ local_path = download_object(client, bucket, file_path, temp_dir)
224
 
225
  # 2. Extract
226
  print("🧠 Sending to Gemini...")
 
312
  except Exception as e:
313
  print(f" ⚠️ Embedding generation failed (non-critical): {e}")
314
 
315
+ # 8. Cleanup
316
+ if os.path.exists(local_path):
317
+ os.remove(local_path)
318
+ print(" 🗑️ Cleaned up temporary file.")
319
+
320
  except Exception as e:
321
  print(f" ❌ Pipeline failed for this file: {e}")
322
 
backend/test_ingest_output.txt ADDED
File without changes
debug_log.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Testing 896d6c15-2d98-4435-9869-0f11e4db48bd against 45bcca29-4e12-45bf-97d4-0b77ff55472f
2
+
3
+ --- Profile Lengths ---
4
+ skills: 1024
5
+ technical_skills: 1024
6
+ experience: 1024
7
+ certifications: 1024
8
+
9
+ --- Job Lengths ---
10
+ skills: 1024
11
+ technical_skills: 1024
12
+ work_experience: 1024
13
+ certifications: None
14
+
15
+ Result: {"total_score": 80.5, "breakdown": {"technical_skills": 95.8, "experience": 62.7, "projects": 93.3, "skills": 75.5, "education": 59.6, "certifications": 69.1}, "weights": {"technical_skills": 0.35, "experience": 0.2, "projects": 0.15, "skills": 0.1, "education": 0.1, "certifications": 0.1}}
entity_benchmark_scaled_results.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ IRIS Entity-to-Embedding Efficiency Results (Scaled)
2
+ Total Profiles Evaluated: 50
3
+ ------------------------------------------------------------
4
+ Entity Type | Mean Latency (ms) | Std Dev (ms)
5
+ ------------------------------------------------------------
6
+ Headline | 965.78 | 2969.16
7
+ Summary | 785.70 | 141.60
8
+ Skills (List) | 780.01 | 160.76
9
+ Experience (List)| 1005.30 | 185.11
10
+ ------------------------------------------------------------
11
+ MEAN TOTAL PER PROFILE: 3536.80 ms
12
+ Average Throughput: 0.283 profiles/sec
experimental_results.tex ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \section{Experimental Results}
2
+ \label{sec:experimental_results}
3
+
4
+ In this section, we present the empirical evaluation of the IRIS system, focusing on two key dimensions: computational efficiency (latency and throughput) and retrieval accuracy.
5
+
6
+ \subsection{Computational Efficiency}
7
+ The efficiency of the entity extraction and embedding pipeline was evaluated using a dataset of 50 candidate profiles. The pipeline consists of extracting specific entities—Headline, Summary, Skills, and Experience—and generating their corresponding embeddings using the BGE-M3 model.
8
+
9
+ Table~\ref{tab:latency_results} summarizes the mean latency and standard deviation for each entity type.
10
+
11
+ \begin{table}[h]
12
+ \centering
13
+ \caption{Mean Latency and Standard Deviation per Entity Extraction (N=50)}
14
+ \label{tab:latency_results}
15
+ \begin{tabular}{lrr}
16
+ \hline
17
+ \textbf{Entity Type} & \textbf{Mean Latency (ms)} & \textbf{Std. Dev. (ms)} \\ \hline
18
+ Headline & 965.78 & 2969.16 \\
19
+ Summary & 785.70 & 141.60 \\
20
+ Skills (List) & 780.01 & 160.76 \\
21
+ Experience (List) & 1005.30 & 185.11 \\ \hline
22
+ \textbf{Total per Profile} & \textbf{3536.80} & -- \\ \hline
23
+ \end{tabular}
24
+ \end{table}
25
+
26
+ The average total processing time per profile is approximately 3.54 seconds, resulting in a throughput of \textbf{0.283 profiles per second}. While the Headline extraction shows high variance, possibly due to network latency or cold-start issues in the embedding service, the overall pipeline maintains a consistent performance suitable for near-real-time recruitment tasks.
27
+
28
+ \subsection{Retrieval Performance}
29
+ We compared the proposed IRIS matching methods against standard baselines using Mean Reciprocal Rank (MRR) and Recall@K ($R@k$). The evaluation included:
30
+ \begin{itemize}
31
+ \item \textbf{Jaccard Baseline}: A keyword-based overlap method.
32
+ \item \textbf{BERT Flattened}: Dense retrieval using BERT embeddings on concatenated profile text.
33
+ \item \textbf{BGE Flattened}: Dense retrieval using BGE-M3 embeddings on concatenated profile text.
34
+ \item \textbf{BGE Granular Weighted}: Our proposed method using weighted cosine similarity across specific entities.
35
+ \end{itemize}
36
+
37
+ Table~\ref{tab:retrieval_results} presents the results of this comparison.
38
+
39
+ \begin{table}[h]
40
+ \centering
41
+ \caption{Comparison of Retrieval Accuracy Metrics}
42
+ \label{tab:retrieval_results}
43
+ \begin{tabular}{lccc}
44
+ \hline
45
+ \textbf{Method} & \textbf{MRR} & \textbf{R@1} & \textbf{R@3} \\ \hline
46
+ Jaccard Baseline & 0.0755 & 0.016 & 0.048 \\
47
+ BERT Flattened & 0.1708 & 0.048 & \textbf{0.144} \\
48
+ BGE Flattened & \textbf{0.1729} & \textbf{0.048} & \textbf{0.144} \\
49
+ BGE Granular Weighted & 0.0749 & 0.016 & 0.040 \\ \hline
50
+ \end{tabular}
51
+ \end{table}
52
+
53
+ The results indicate that the \textbf{BGE Flattened} approach achieves the highest MRR (0.1729) and Recall@1/Recall@3. Notably, the granular weighted approach currently underperforms compared to the flattened embedding methods, suggesting that the aggregation logic or weight distribution for specific entities requires further optimization.
match_benchmark_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Jaccard_Baseline": {
3
+ "mrr": 0.07552527033230824,
4
+ "r1": 0.016,
5
+ "r3": 0.048
6
+ },
7
+ "BERT_Flattened": {
8
+ "mrr": 0.1688751043476369,
9
+ "r1": 0.048,
10
+ "r3": 0.144
11
+ },
12
+ "BGE_Flattened": {
13
+ "mrr": 0.17255959067443694,
14
+ "r1": 0.048,
15
+ "r3": 0.144
16
+ },
17
+ "BGE_Granular_Weighted": {
18
+ "mrr": 0.07297651022436405,
19
+ "r1": 0.012,
20
+ "r3": 0.044
21
+ }
22
+ }
matching_analysis_report.md ADDED
File without changes
quality_metrics_adversarial.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "N=200 Adversarial (Noise + Distractors)",
3
+ "mrr": 0.70625,
4
+ "recall_1": 0.525,
5
+ "recall_3": 0.775
6
+ }
schema_dump.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- Profile Embeddings ---
2
+ - certifications
3
+ - created_at
4
+ - education
5
+ - experience
6
+ - headline
7
+ - id
8
+ - projects
9
+ - skills
10
+ - summary
11
+ - technical_skills
12
+ - updated_at
13
+
14
+ --- Job Embeddings ---
15
+ - created_at
16
+ - education
17
+ - job_id
18
+ - skills
19
+ - technical_skills
20
+ - tools
21
+ - updated_at
22
+ - work_experience
src/components/Admin/AdminLayout.jsx CHANGED
@@ -1,23 +1,24 @@
1
  import React from 'react';
2
  import { motion } from 'framer-motion';
3
- import { supabase } from '../../supabaseClient';
4
 
5
  // --- Icons ---
6
- const HomeIcon = () => ( <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"></path><polyline points="9 22 9 12 15 12 15 22"></polyline></svg> );
7
- const BriefcaseIcon = () => ( <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path></svg> );
8
- const MessageSquareIcon = () => ( <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg> );
9
  // ✅ UPDATED: Complete, robust Settings Icon (Gear)
10
- const SettingsIcon = () => (
11
  <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
12
  <path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.38a2 2 0 0 0-.73-2.73l-.15-.1a2 2 0 0 1-1-1.72v-.51a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path>
13
  <circle cx="12" cy="12" r="3"></circle>
14
- </svg>
15
  );
16
- const BriefcasePlusIcon = () => ( <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><line x1="12" y1="11" x2="12" y2="17"></line><line x1="9" y1="14" x2="15" y2="14"></line></svg>);
17
- const LogoutIcon = () => ( <svg style={{ width: '20px', height: '20px', marginRight: '8px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M9 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h4"></path><polyline points="16 17 21 12 16 7"></polyline><line x1="21" y1="12" x2="9" y2="12"></line></svg> );
 
18
 
19
  export default function AdminLayout({ children, activeTab, setActiveTab, onNavigate }) {
20
-
21
  // Global Logout Handler
22
  const handleLogout = async () => {
23
  const { error } = await supabase.auth.signOut();
@@ -27,7 +28,7 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
27
 
28
  return (
29
  <div style={{ height: '100vh', width: '100%', backgroundColor: '#020617', color: 'white', fontFamily: "'Montserrat', sans-serif", display: 'flex', position: 'relative', overflow: 'hidden' }}>
30
-
31
  {/* Background Effects */}
32
  <div style={{ position: 'fixed', top: 0, left: 0, right: 0, bottom: 0, zIndex: 0 }}>
33
  <div style={{ position: 'absolute', borderRadius: '50%', filter: 'blur(80px)', opacity: 0.3, width: '400px', height: '400px', backgroundColor: '#EF4444', top: '-50px', left: '-100px' }}></div>
@@ -37,14 +38,15 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
37
  {/* Sidebar */}
38
  <aside style={{ width: '100px', padding: '2rem 0', display: 'flex', flexDirection: 'column', alignItems: 'center', zIndex: 10 }}>
39
  <div style={{ fontSize: '1.5rem', fontWeight: 'bold', color: '#EF4444', marginBottom: '2rem' }}>IRIS</div>
40
- <nav style={{
41
- display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '1.5rem',
42
- backgroundColor: 'rgba(239, 68, 68, 0.05)', border: '1px solid rgba(239, 68, 68, 0.2)',
43
- borderRadius: '9999px', padding: '2rem 1rem'
44
  }}>
45
  <NavButton active={activeTab === 'dashboard'} onClick={() => setActiveTab('dashboard')} icon={<HomeIcon />} />
46
  <NavButton active={activeTab === 'job-management'} onClick={() => setActiveTab('job-management')} icon={<BriefcasePlusIcon />} />
47
  <NavButton active={activeTab === 'jobs'} onClick={() => setActiveTab('jobs')} icon={<BriefcaseIcon />} />
 
48
  <NavButton active={activeTab === 'messages'} onClick={() => setActiveTab('messages')} icon={<MessageSquareIcon />} />
49
  <NavButton active={activeTab === 'settings'} onClick={() => setActiveTab('settings')} icon={<SettingsIcon />} />
50
  </nav>
@@ -52,26 +54,26 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
52
 
53
  {/* Main Content Area */}
54
  <div style={{ flex: 1, padding: '2rem', overflowY: 'auto', height: '100vh', boxSizing: 'border-box', position: 'relative', zIndex: 1 }}>
55
-
56
  {/* ✅ GLOBAL LOGOUT BUTTON - Updated Styles for Alignment */}
57
  <div style={{ position: 'absolute', top: '2rem', right: '2rem', zIndex: 50 }}>
58
- <motion.button
59
- onClick={handleLogout}
60
- whileHover={{ scale: 1.05 }}
61
- whileTap={{ scale: 0.95 }}
62
- style={{
63
- backgroundColor: '#EF4444',
64
- color: 'white',
65
- display: 'flex',
66
- alignItems: 'center',
67
  justifyContent: 'center',
68
- padding: '0.75rem 1.5rem',
69
- borderRadius: '0.5rem',
70
- fontWeight: 'bold',
71
- cursor: 'pointer',
72
- border: 'none',
73
  // Matches the visual weight of "Post New Job"
74
- minWidth: '160px'
75
  }}
76
  >
77
  <LogoutIcon /> Logout
@@ -86,10 +88,10 @@ export default function AdminLayout({ children, activeTab, setActiveTab, onNavig
86
 
87
  // Helper Component for Navigation Buttons
88
  const NavButton = ({ active, onClick, icon }) => (
89
- <motion.button
90
- whileHover={{ scale: 1.1 }}
91
- whileTap={{ scale: 0.9 }}
92
- onClick={onClick}
93
  style={{ background: 'none', border: 'none', color: active ? '#EF4444' : '#d1d5db', cursor: 'pointer' }}
94
  >
95
  {icon}
 
1
  import React from 'react';
2
  import { motion } from 'framer-motion';
3
+ import { supabase } from '../../supabaseClient';
4
 
5
  // --- Icons ---
6
+ const HomeIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"></path><polyline points="9 22 9 12 15 12 15 22"></polyline></svg>);
7
+ const BriefcaseIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path></svg>);
8
+ const MessageSquareIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg>);
9
  // ✅ UPDATED: Complete, robust Settings Icon (Gear)
10
+ const SettingsIcon = () => (
11
  <svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
12
  <path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.38a2 2 0 0 0-.73-2.73l-.15-.1a2 2 0 0 1-1-1.72v-.51a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"></path>
13
  <circle cx="12" cy="12" r="3"></circle>
14
+ </svg>
15
  );
16
+ const BriefcasePlusIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><rect x="2" y="7" width="20" height="14" rx="2" ry="2"></rect><path d="M16 21V5a2 2 0 0 0-2-2h-4a2 2 0 0 0-2 2v16"></path><line x1="12" y1="11" x2="12" y2="17"></line><line x1="9" y1="14" x2="15" y2="14"></line></svg>);
17
+ const ClustersIcon = () => (<svg style={{ width: '24px', height: '24px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><circle cx="12" cy="12" r="3" /><circle cx="4" cy="6" r="2" /><circle cx="20" cy="6" r="2" /><circle cx="4" cy="18" r="2" /><circle cx="20" cy="18" r="2" /><line x1="12" y1="9" x2="5" y2="7" /><line x1="12" y1="9" x2="19" y2="7" /><line x1="12" y1="15" x2="5" y2="17" /><line x1="12" y1="15" x2="19" y2="17" /></svg>);
18
+ const LogoutIcon = () => (<svg style={{ width: '20px', height: '20px', marginRight: '8px' }} viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M9 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h4"></path><polyline points="16 17 21 12 16 7"></polyline><line x1="21" y1="12" x2="9" y2="12"></line></svg>);
19
 
20
  export default function AdminLayout({ children, activeTab, setActiveTab, onNavigate }) {
21
+
22
  // Global Logout Handler
23
  const handleLogout = async () => {
24
  const { error } = await supabase.auth.signOut();
 
28
 
29
  return (
30
  <div style={{ height: '100vh', width: '100%', backgroundColor: '#020617', color: 'white', fontFamily: "'Montserrat', sans-serif", display: 'flex', position: 'relative', overflow: 'hidden' }}>
31
+
32
  {/* Background Effects */}
33
  <div style={{ position: 'fixed', top: 0, left: 0, right: 0, bottom: 0, zIndex: 0 }}>
34
  <div style={{ position: 'absolute', borderRadius: '50%', filter: 'blur(80px)', opacity: 0.3, width: '400px', height: '400px', backgroundColor: '#EF4444', top: '-50px', left: '-100px' }}></div>
 
38
  {/* Sidebar */}
39
  <aside style={{ width: '100px', padding: '2rem 0', display: 'flex', flexDirection: 'column', alignItems: 'center', zIndex: 10 }}>
40
  <div style={{ fontSize: '1.5rem', fontWeight: 'bold', color: '#EF4444', marginBottom: '2rem' }}>IRIS</div>
41
+ <nav style={{
42
+ display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '1.5rem',
43
+ backgroundColor: 'rgba(239, 68, 68, 0.05)', border: '1px solid rgba(239, 68, 68, 0.2)',
44
+ borderRadius: '9999px', padding: '2rem 1rem'
45
  }}>
46
  <NavButton active={activeTab === 'dashboard'} onClick={() => setActiveTab('dashboard')} icon={<HomeIcon />} />
47
  <NavButton active={activeTab === 'job-management'} onClick={() => setActiveTab('job-management')} icon={<BriefcasePlusIcon />} />
48
  <NavButton active={activeTab === 'jobs'} onClick={() => setActiveTab('jobs')} icon={<BriefcaseIcon />} />
49
+ <NavButton active={activeTab === 'clusters'} onClick={() => setActiveTab('clusters')} icon={<ClustersIcon />} />
50
  <NavButton active={activeTab === 'messages'} onClick={() => setActiveTab('messages')} icon={<MessageSquareIcon />} />
51
  <NavButton active={activeTab === 'settings'} onClick={() => setActiveTab('settings')} icon={<SettingsIcon />} />
52
  </nav>
 
54
 
55
  {/* Main Content Area */}
56
  <div style={{ flex: 1, padding: '2rem', overflowY: 'auto', height: '100vh', boxSizing: 'border-box', position: 'relative', zIndex: 1 }}>
57
+
58
  {/* ✅ GLOBAL LOGOUT BUTTON - Updated Styles for Alignment */}
59
  <div style={{ position: 'absolute', top: '2rem', right: '2rem', zIndex: 50 }}>
60
+ <motion.button
61
+ onClick={handleLogout}
62
+ whileHover={{ scale: 1.05 }}
63
+ whileTap={{ scale: 0.95 }}
64
+ style={{
65
+ backgroundColor: '#EF4444',
66
+ color: 'white',
67
+ display: 'flex',
68
+ alignItems: 'center',
69
  justifyContent: 'center',
70
+ padding: '0.75rem 1.5rem',
71
+ borderRadius: '0.5rem',
72
+ fontWeight: 'bold',
73
+ cursor: 'pointer',
74
+ border: 'none',
75
  // Matches the visual weight of "Post New Job"
76
+ minWidth: '160px'
77
  }}
78
  >
79
  <LogoutIcon /> Logout
 
88
 
89
  // Helper Component for Navigation Buttons
90
  const NavButton = ({ active, onClick, icon }) => (
91
+ <motion.button
92
+ whileHover={{ scale: 1.1 }}
93
+ whileTap={{ scale: 0.9 }}
94
+ onClick={onClick}
95
  style={{ background: 'none', border: 'none', color: active ? '#EF4444' : '#d1d5db', cursor: 'pointer' }}
96
  >
97
  {icon}
src/components/Admin/TalentClusters.jsx ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { supabase } from '../../supabaseClient';
4
+ import FullProfileOverlay from '../FullProfileOverlay';
5
+
6
+ // ─── Icons ───────────────────────────────────────────────────────────────────
7
+ const ClusterIcon = () => (
8
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
9
+ <circle cx="12" cy="12" r="3" /><circle cx="4" cy="6" r="3" /><circle cx="20" cy="6" r="3" />
10
+ <circle cx="4" cy="18" r="3" /><circle cx="20" cy="18" r="3" />
11
+ <line x1="12" y1="9" x2="4" y2="7" /><line x1="12" y1="9" x2="20" y2="7" />
12
+ <line x1="12" y1="15" x2="4" y2="17" /><line x1="12" y1="15" x2="20" y2="17" />
13
+ </svg>
14
+ );
15
+
16
+ const UsersIcon = () => (
17
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
18
+ <path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2" /><circle cx="9" cy="7" r="4" />
19
+ <path d="M23 21v-2a4 4 0 0 0-3-3.87" /><path d="M16 3.13a4 4 0 0 1 0 7.75" />
20
+ </svg>
21
+ );
22
+
23
+ const SearchIcon = () => (
24
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
25
+ <circle cx="11" cy="11" r="8" /><line x1="21" y1="21" x2="16.65" y2="16.65" />
26
+ </svg>
27
+ );
28
+
29
+ const ChevronDown = ({ open }) => (
30
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2.5"
31
+ style={{ transform: open ? 'rotate(180deg)' : 'rotate(0deg)', transition: 'transform 0.3s ease' }}>
32
+ <polyline points="6 9 12 15 18 9" />
33
+ </svg>
34
+ );
35
+
36
+ const XIcon = () => (
37
+ <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
38
+ <line x1="18" y1="6" x2="6" y2="18" /><line x1="6" y1="6" x2="18" y2="18" />
39
+ </svg>
40
+ );
41
+
42
+ // ─── Cluster colour palette ───────────────────────────────────────────────────
43
+ const CLUSTER_COLORS = [
44
+ { accent: '#EF4444', glow: 'rgba(239,68,68,0.15)', border: 'rgba(239,68,68,0.3)' },
45
+ { accent: '#8B5CF6', glow: 'rgba(139,92,246,0.15)', border: 'rgba(139,92,246,0.3)' },
46
+ { accent: '#06B6D4', glow: 'rgba(6,182,212,0.15)', border: 'rgba(6,182,212,0.3)' },
47
+ { accent: '#10B981', glow: 'rgba(16,185,129,0.15)', border: 'rgba(16,185,129,0.3)' },
48
+ { accent: '#F59E0B', glow: 'rgba(245,158,11,0.15)', border: 'rgba(245,158,11,0.3)' },
49
+ { accent: '#EC4899', glow: 'rgba(236,72,153,0.15)', border: 'rgba(236,72,153,0.3)' },
50
+ ];
51
+
52
+ const getColor = (idx) => CLUSTER_COLORS[idx % CLUSTER_COLORS.length];
53
+
54
+ // ─── Profile Card ─────────────────────────────────────────────────────────────
55
+ const ProfileCard = ({ profile, accent, onView }) => {
56
+ const [hovered, setHovered] = useState(false);
57
+ const skills = Array.isArray(profile.technical_skills)
58
+ ? profile.technical_skills.slice(0, 4)
59
+ : typeof profile.technical_skills === 'string'
60
+ ? profile.technical_skills.split(',').slice(0, 4).map(s => s.trim())
61
+ : [];
62
+
63
+ return (
64
+ <motion.div
65
+ onMouseEnter={() => setHovered(true)}
66
+ onMouseLeave={() => setHovered(false)}
67
+ whileHover={{ y: -4, scale: 1.01 }}
68
+ onClick={() => onView(profile)}
69
+ style={{
70
+ backgroundColor: hovered ? 'rgba(255,255,255,0.06)' : 'rgba(255,255,255,0.03)',
71
+ border: `1px solid ${hovered ? accent : 'rgba(255,255,255,0.08)'}`,
72
+ borderRadius: '12px',
73
+ padding: '1rem',
74
+ cursor: 'pointer',
75
+ transition: 'border-color 0.2s',
76
+ boxShadow: hovered ? `0 4px 20px ${accent}30` : 'none',
77
+ }}
78
+ >
79
+ <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', marginBottom: '0.6rem' }}>
80
+ <img
81
+ src={profile.avatar_url || `https://ui-avatars.com/api/?name=${encodeURIComponent(profile.full_name || 'User')}&background=random&size=48`}
82
+ alt={profile.full_name}
83
+ style={{ width: 40, height: 40, borderRadius: '50%', objectFit: 'cover', border: `2px solid ${accent}55` }}
84
+ />
85
+ <div>
86
+ <p style={{ fontWeight: '700', color: '#fff', fontSize: '0.9rem', marginBottom: 2 }}>{profile.full_name || 'Unknown'}</p>
87
+ <p style={{ fontSize: '0.75rem', color: '#94a3b8' }}>{profile.headline || profile.role || '—'}</p>
88
+ </div>
89
+ </div>
90
+
91
+ <p style={{ fontSize: '0.75rem', color: '#64748b', marginBottom: '0.5rem' }}>
92
+ {profile.experience_years ? `${profile.experience_years} yrs exp` : 'No experience listed'}
93
+ </p>
94
+
95
+ {skills.length > 0 && (
96
+ <div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.3rem' }}>
97
+ {skills.map((s, i) => (
98
+ <span key={i} style={{
99
+ fontSize: '0.7rem', padding: '2px 8px', borderRadius: '4px',
100
+ backgroundColor: `${accent}20`, color: accent,
101
+ border: `1px solid ${accent}40`
102
+ }}>{s}</span>
103
+ ))}
104
+ </div>
105
+ )}
106
+ </motion.div>
107
+ );
108
+ };
109
+
110
+ // ─── Cluster Card ─────────────────────────────────────────────────────────────
111
+ const ClusterCard = ({ label, profiles, colorIdx, searchQuery, onViewProfile }) => {
112
+ const [expanded, setExpanded] = useState(true);
113
+ const color = getColor(colorIdx);
114
+
115
+ const filtered = profiles.filter(p => {
116
+ const q = searchQuery.toLowerCase();
117
+ return (
118
+ (p.full_name || '').toLowerCase().includes(q) ||
119
+ (p.headline || '').toLowerCase().includes(q) ||
120
+ (p.role || '').toLowerCase().includes(q)
121
+ );
122
+ });
123
+
124
+ if (searchQuery && filtered.length === 0) return null;
125
+
126
+ return (
127
+ <motion.div
128
+ initial={{ opacity: 0, y: 20 }}
129
+ animate={{ opacity: 1, y: 0 }}
130
+ style={{
131
+ backgroundColor: color.glow,
132
+ border: `1px solid ${color.border}`,
133
+ borderRadius: '16px',
134
+ overflow: 'hidden',
135
+ marginBottom: '1.5rem',
136
+ }}
137
+ >
138
+ {/* Header */}
139
+ <button
140
+ onClick={() => setExpanded(e => !e)}
141
+ style={{
142
+ width: '100%', background: 'none', border: 'none', cursor: 'pointer',
143
+ padding: '1.25rem 1.5rem',
144
+ display: 'flex', alignItems: 'center', justifyContent: 'space-between',
145
+ color: '#fff',
146
+ }}
147
+ >
148
+ <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem' }}>
149
+ <div style={{
150
+ width: 36, height: 36, borderRadius: '10px',
151
+ backgroundColor: `${color.accent}22`, display: 'flex', alignItems: 'center', justifyContent: 'center',
152
+ border: `1px solid ${color.accent}55`
153
+ }}>
154
+ <ClusterIcon style={{ color: color.accent }} />
155
+ </div>
156
+ <div style={{ textAlign: 'left' }}>
157
+ <h3 style={{ fontSize: '1.05rem', fontWeight: '700', color: '#fff', margin: 0 }}>{label}</h3>
158
+ <div style={{ display: 'flex', alignItems: 'center', gap: '4px', color: '#94a3b8', fontSize: '0.8rem', marginTop: 2 }}>
159
+ <UsersIcon />
160
+ <span>{filtered.length} {filtered.length === 1 ? 'profile' : 'profiles'}</span>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ <div style={{ color: color.accent }}>
165
+ <ChevronDown open={expanded} />
166
+ </div>
167
+ </button>
168
+
169
+ {/* Body */}
170
+ <AnimatePresence initial={false}>
171
+ {expanded && (
172
+ <motion.div
173
+ key="body"
174
+ initial={{ height: 0, opacity: 0 }}
175
+ animate={{ height: 'auto', opacity: 1 }}
176
+ exit={{ height: 0, opacity: 0 }}
177
+ transition={{ duration: 0.3 }}
178
+ style={{ overflow: 'hidden' }}
179
+ >
180
+ <div style={{
181
+ padding: '0 1.5rem 1.5rem',
182
+ display: 'grid',
183
+ gridTemplateColumns: 'repeat(auto-fill, minmax(220px, 1fr))',
184
+ gap: '0.75rem'
185
+ }}>
186
+ {filtered.map(p => (
187
+ <ProfileCard
188
+ key={p.id}
189
+ profile={p}
190
+ accent={color.accent}
191
+ onView={onViewProfile}
192
+ />
193
+ ))}
194
+ </div>
195
+ </motion.div>
196
+ )}
197
+ </AnimatePresence>
198
+ </motion.div>
199
+ );
200
+ };
201
+
202
+ // ─── Profile Detail Modal ────────────────────��────────────────────────────────
203
+ const ProfileModal = ({ profile, onClose }) => {
204
+ if (!profile) return null;
205
+ const skills = Array.isArray(profile.technical_skills)
206
+ ? profile.technical_skills
207
+ : typeof profile.technical_skills === 'string'
208
+ ? profile.technical_skills.split(',').map(s => s.trim())
209
+ : [];
210
+
211
+ return (
212
+ <AnimatePresence>
213
+ <motion.div
214
+ initial={{ opacity: 0 }}
215
+ animate={{ opacity: 1 }}
216
+ exit={{ opacity: 0 }}
217
+ onClick={onClose}
218
+ style={{
219
+ position: 'fixed', inset: 0, backgroundColor: 'rgba(0,0,0,0.7)',
220
+ backdropFilter: 'blur(6px)', zIndex: 100, display: 'flex',
221
+ alignItems: 'center', justifyContent: 'center', padding: '1rem'
222
+ }}
223
+ >
224
+ <motion.div
225
+ initial={{ scale: 0.9, opacity: 0 }}
226
+ animate={{ scale: 1, opacity: 1 }}
227
+ exit={{ scale: 0.9, opacity: 0 }}
228
+ onClick={e => e.stopPropagation()}
229
+ style={{
230
+ backgroundColor: '#0f172a',
231
+ backgroundImage: `
232
+ radial-gradient(at 0% 0%, rgba(139,92,246,0.2) 0px, transparent 50%),
233
+ radial-gradient(at 100% 100%, rgba(239,68,68,0.2) 0px, transparent 50%)
234
+ `,
235
+ border: '1px solid rgba(255,255,255,0.1)',
236
+ borderRadius: '20px',
237
+ width: '100%', maxWidth: '540px',
238
+ maxHeight: '80vh', overflowY: 'auto',
239
+ boxShadow: '0 25px 50px rgba(0,0,0,0.5)',
240
+ padding: '2rem',
241
+ }}
242
+ >
243
+ {/* Close */}
244
+ <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', marginBottom: '1.5rem' }}>
245
+ <div style={{ display: 'flex', alignItems: 'center', gap: '1rem' }}>
246
+ <img
247
+ src={profile.avatar_url || `https://ui-avatars.com/api/?name=${encodeURIComponent(profile.full_name || 'User')}&background=random&size=80`}
248
+ alt={profile.full_name}
249
+ style={{ width: 56, height: 56, borderRadius: '50%', objectFit: 'cover', border: '2px solid rgba(239,68,68,0.4)' }}
250
+ />
251
+ <div>
252
+ <h2 style={{ fontSize: '1.4rem', fontWeight: '800', color: '#fff', margin: 0 }}>{profile.full_name}</h2>
253
+ <p style={{ color: '#94a3b8', fontSize: '0.85rem', margin: 0 }}>{profile.headline || profile.role || '—'}</p>
254
+ </div>
255
+ </div>
256
+ <button onClick={onClose} style={{ background: 'none', border: 'none', color: '#64748b', cursor: 'pointer' }}>
257
+ <XIcon />
258
+ </button>
259
+ </div>
260
+
261
+ {/* Stats Row */}
262
+ <div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, 1fr)', gap: '0.75rem', marginBottom: '1.5rem' }}>
263
+ {[
264
+ { label: 'Experience', value: profile.experience_years ? `${profile.experience_years} yrs` : '—' },
265
+ { label: 'Cluster', value: profile.cluster_label || '—' },
266
+ { label: 'Email', value: profile.email ? profile.email.split('@')[0] : '—' },
267
+ ].map(({ label, value }) => (
268
+ <div key={label} style={{
269
+ backgroundColor: 'rgba(255,255,255,0.05)', borderRadius: '10px',
270
+ padding: '0.75rem', border: '1px solid rgba(255,255,255,0.08)'
271
+ }}>
272
+ <p style={{ fontSize: '0.7rem', color: '#64748b', textTransform: 'uppercase', letterSpacing: '0.05em', marginBottom: 4 }}>{label}</p>
273
+ <p style={{ fontSize: '0.85rem', fontWeight: '600', color: '#e2e8f0', wordBreak: 'break-all' }}>{value}</p>
274
+ </div>
275
+ ))}
276
+ </div>
277
+
278
+ {/* Summary */}
279
+ {profile.summary && (
280
+ <div style={{ marginBottom: '1.5rem' }}>
281
+ <h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Summary</h4>
282
+ <p style={{ fontSize: '0.9rem', lineHeight: '1.6', color: '#cbd5e1', backgroundColor: 'rgba(255,255,255,0.04)', padding: '0.75rem', borderRadius: '8px', border: '1px solid rgba(255,255,255,0.05)' }}>
283
+ {profile.summary}
284
+ </p>
285
+ </div>
286
+ )}
287
+
288
+ {/* Skills */}
289
+ {skills.length > 0 && (
290
+ <div style={{ marginBottom: '1.5rem' }}>
291
+ <h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Technical Skills</h4>
292
+ <div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.4rem' }}>
293
+ {skills.map((s, i) => (
294
+ <span key={i} style={{
295
+ fontSize: '0.8rem', padding: '4px 10px', borderRadius: '6px',
296
+ backgroundColor: 'rgba(239,68,68,0.1)', color: '#EF4444',
297
+ border: '1px solid rgba(239,68,68,0.2)'
298
+ }}>{s}</span>
299
+ ))}
300
+ </div>
301
+ </div>
302
+ )}
303
+
304
+ {/* Education */}
305
+ {profile.education && (
306
+ <div>
307
+ <h4 style={{ fontSize: '0.85rem', color: '#94a3b8', fontWeight: '600', marginBottom: '0.5rem', textTransform: 'uppercase', letterSpacing: '0.05em' }}>Education</h4>
308
+ <p style={{ fontSize: '0.85rem', color: '#cbd5e1' }}>
309
+ {typeof profile.education === 'string' ? profile.education : JSON.stringify(profile.education)}
310
+ </p>
311
+ </div>
312
+ )}
313
+ </motion.div>
314
+ </motion.div>
315
+ </AnimatePresence>
316
+ );
317
+ };
318
+
319
+ // ─── MAIN PAGE ────────────────────────────────────────────────────────────────
320
+ export default function TalentClusters() {
321
+ const [clusters, setClusters] = useState({}); // { labelName: [profiles] }
322
+ const [isLoading, setIsLoading] = useState(true);
323
+ const [searchQuery, setSearchQuery] = useState('');
324
+ const [selectedProfile, setSelectedProfile] = useState(null);
325
+ const [error, setError] = useState(null);
326
+
327
+ useEffect(() => {
328
+ fetchClusters();
329
+ }, []);
330
+
331
+ const fetchClusters = async () => {
332
+ setIsLoading(true);
333
+ setError(null);
334
+ try {
335
+ const { data, error } = await supabase
336
+ .from('profiles')
337
+ .select('id, full_name, email, avatar_url, headline, role, experience_years, technical_skills, summary, education, cluster_label')
338
+ .not('cluster_label', 'is', null);
339
+
340
+ if (error) throw error;
341
+
342
+ // Group by cluster_label
343
+ const grouped = {};
344
+ data.forEach(profile => {
345
+ const label = profile.cluster_label || 'Uncategorized';
346
+ if (!grouped[label]) grouped[label] = [];
347
+ grouped[label].push(profile);
348
+ });
349
+
350
+ setClusters(grouped);
351
+ } catch (err) {
352
+ console.error('Failed to fetch clusters:', err);
353
+ setError('Failed to load talent clusters. Please try again.');
354
+ } finally {
355
+ setIsLoading(false);
356
+ }
357
+ };
358
+
359
+ const clusterEntries = Object.entries(clusters).sort((a, b) => b[1].length - a[1].length);
360
+ const totalProfiles = Object.values(clusters).reduce((s, arr) => s + arr.length, 0);
361
+
362
+ return (
363
+ <div style={{ paddingBottom: '4rem' }}>
364
+ <style>{`
365
+ .hide-scrollbar::-webkit-scrollbar { display: none; }
366
+ .hide-scrollbar { -ms-overflow-style: none; scrollbar-width: none; }
367
+ @keyframes spin { 100% { transform: rotate(360deg); } }
368
+ @keyframes pulse-dot { 0%,100% { opacity: 1; } 50% { opacity: 0.3; } }
369
+ `}</style>
370
+
371
+ {/* Header */}
372
+ <header style={{ marginBottom: '2rem' }}>
373
+ <div style={{ display: 'flex', alignItems: 'center', gap: '0.75rem', marginBottom: '0.5rem' }}>
374
+ <div style={{ color: '#EF4444' }}><ClusterIcon /></div>
375
+ <h1 style={{ fontSize: '1.875rem', fontWeight: 'bold', margin: 0 }}>Talent Clusters</h1>
376
+ </div>
377
+ <p style={{ color: '#64748b', fontSize: '0.9rem' }}>
378
+ AI-grouped candidate profiles based on skills and experience similarity.
379
+ </p>
380
+ </header>
381
+
382
+ {/* Stats Bar */}
383
+ <div style={{ display: 'flex', gap: '1rem', marginBottom: '2rem', flexWrap: 'wrap' }}>
384
+ {[
385
+ { label: 'Total Clusters', value: clusterEntries.length, color: '#EF4444' },
386
+ { label: 'Total Profiles', value: totalProfiles, color: '#8B5CF6' },
387
+ { label: 'Avg. Cluster Size', value: clusterEntries.length ? Math.round(totalProfiles / clusterEntries.length) : 0, color: '#06B6D4' },
388
+ ].map(({ label, value, color }) => (
389
+ <div key={label} style={{
390
+ flex: 1, minWidth: 140,
391
+ backgroundColor: 'rgba(255,255,255,0.03)',
392
+ border: '1px solid rgba(255,255,255,0.08)',
393
+ borderRadius: '12px', padding: '1rem 1.25rem',
394
+ }}>
395
+ <p style={{ fontSize: '0.75rem', color: '#64748b', textTransform: 'uppercase', letterSpacing: '0.05em', marginBottom: 4 }}>{label}</p>
396
+ <p style={{ fontSize: '1.8rem', fontWeight: '800', color, margin: 0, lineHeight: 1 }}>{isLoading ? '—' : value}</p>
397
+ </div>
398
+ ))}
399
+ </div>
400
+
401
+ {/* Search + Refresh */}
402
+ <div style={{ display: 'flex', gap: '0.75rem', marginBottom: '2rem', alignItems: 'center' }}>
403
+ <div style={{ position: 'relative', flexGrow: 1 }}>
404
+ <div style={{ position: 'absolute', left: 12, top: '50%', transform: 'translateY(-50%)', color: '#64748b' }}>
405
+ <SearchIcon />
406
+ </div>
407
+ <input
408
+ type="text"
409
+ placeholder="Search by name, role, or headline..."
410
+ value={searchQuery}
411
+ onChange={e => setSearchQuery(e.target.value)}
412
+ style={{
413
+ width: '100%', padding: '0.75rem 0.75rem 0.75rem 2.25rem',
414
+ borderRadius: '0.5rem', border: '1px solid rgba(239,68,68,0.3)',
415
+ backgroundColor: 'rgba(255,255,255,0.04)', color: 'white',
416
+ fontSize: '0.9rem', outline: 'none', boxSizing: 'border-box'
417
+ }}
418
+ />
419
+ </div>
420
+ <motion.button
421
+ onClick={fetchClusters}
422
+ whileHover={{ scale: 1.04 }}
423
+ whileTap={{ scale: 0.96 }}
424
+ style={{
425
+ backgroundColor: 'rgba(239,68,68,0.15)', border: '1px solid rgba(239,68,68,0.4)',
426
+ color: '#EF4444', padding: '0.75rem 1.25rem', borderRadius: '0.5rem',
427
+ cursor: 'pointer', fontWeight: '600', fontSize: '0.85rem', whiteSpace: 'nowrap'
428
+ }}
429
+ >
430
+ ↻ Refresh
431
+ </motion.button>
432
+ </div>
433
+
434
+ {/* Content */}
435
+ {isLoading ? (
436
+ <div style={{ display: 'flex', flexDirection: 'column', alignItems: 'center', justifyContent: 'center', height: '300px', gap: '1rem' }}>
437
+ <div style={{
438
+ width: 40, height: 40, border: '3px solid rgba(239,68,68,0.2)',
439
+ borderTopColor: '#EF4444', borderRadius: '50%',
440
+ animation: 'spin 0.8s linear infinite'
441
+ }} />
442
+ <p style={{ color: '#64748b' }}>Loading talent clusters…</p>
443
+ </div>
444
+ ) : error ? (
445
+ <div style={{ textAlign: 'center', padding: '3rem', color: '#EF4444' }}>
446
+ <p>{error}</p>
447
+ <button onClick={fetchClusters} style={{ marginTop: '1rem', backgroundColor: '#EF4444', color: 'white', border: 'none', padding: '0.5rem 1.5rem', borderRadius: '6px', cursor: 'pointer', fontWeight: '600' }}>
448
+ Retry
449
+ </button>
450
+ </div>
451
+ ) : clusterEntries.length === 0 ? (
452
+ <div style={{ textAlign: 'center', padding: '4rem', color: '#64748b' }}>
453
+ <ClusterIcon />
454
+ <p style={{ marginTop: '1rem' }}>No clusters found. Run the clustering pipeline first.</p>
455
+ </div>
456
+ ) : (
457
+ <>
458
+ {/* Cluster grid legend */}
459
+ <div style={{ display: 'flex', flexWrap: 'wrap', gap: '0.5rem', marginBottom: '1.5rem' }}>
460
+ {clusterEntries.map(([label, profiles], idx) => {
461
+ const color = getColor(idx);
462
+ return (
463
+ <span key={label} style={{
464
+ fontSize: '0.78rem', padding: '4px 12px', borderRadius: '99px',
465
+ backgroundColor: `${color.accent}18`, color: color.accent,
466
+ border: `1px solid ${color.accent}44`, fontWeight: '600'
467
+ }}>
468
+ {label} ({profiles.length})
469
+ </span>
470
+ );
471
+ })}
472
+ </div>
473
+
474
+ {/* Cluster cards */}
475
+ {clusterEntries.map(([label, profiles], idx) => (
476
+ <ClusterCard
477
+ key={label}
478
+ label={label}
479
+ profiles={profiles}
480
+ colorIdx={idx}
481
+ searchQuery={searchQuery}
482
+ onViewProfile={setSelectedProfile}
483
+ />
484
+ ))}
485
+ </>
486
+ )}
487
+
488
+ {/* Profile modal */}
489
+ <AnimatePresence>
490
+ {selectedProfile && (
491
+ <ProfileModal profile={selectedProfile} onClose={() => setSelectedProfile(null)} />
492
+ )}
493
+ </AnimatePresence>
494
+ </div>
495
+ );
496
+ }
src/components/JobListings.jsx CHANGED
@@ -1,18 +1,18 @@
1
  import React, { useState, useEffect } from 'react';
2
  import { motion, AnimatePresence } from 'framer-motion';
3
- import { supabase } from '../supabaseClient';
4
- import { SearchIcon } from './Icons';
5
- import JobDetail from './JobDetail';
6
- import ApplyModel from './ApplyModel';
7
- import JobCard from './JobCard';
8
  import VerificationModal from './VerificationModal'; // ✅ Import the new modal
9
 
10
  export default function JobListings({ searchQuery, setSearchQuery, isSearching, filteredJobListings }) {
11
-
12
  const [selectedJob, setSelectedJob] = useState(null);
13
  const [appliedJobIds, setAppliedJobIds] = useState(new Set());
14
- const [applying, setApplying] = useState(null);
15
-
16
  // State for the Apply Modal
17
  const [jobToApply, setJobToApply] = useState(null);
18
 
@@ -28,7 +28,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
28
  .from('applications')
29
  .select('job_id')
30
  .eq('user_id', user.id);
31
-
32
  if (data) {
33
  setAppliedJobIds(new Set(data.map(app => app.job_id)));
34
  }
@@ -40,7 +40,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
40
  // 2. Open Apply Modal
41
  const initiateApply = (jobId) => {
42
  const job = filteredJobListings.find(j => j.id === jobId);
43
- if(job) {
44
  setJobToApply(job);
45
  }
46
  };
@@ -48,12 +48,12 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
48
  // 3. Submit Application (With Verification Gatekeeper)
49
  const handleFinalSubmit = async (formData) => {
50
  if (!jobToApply) return;
51
-
52
  setApplying(jobToApply.id);
53
-
54
  try {
55
  const { data: { user } } = await supabase.auth.getUser();
56
-
57
  if (!user) {
58
  alert("Please log in to apply.");
59
  return;
@@ -69,30 +69,30 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
69
  if (profileError) throw profileError;
70
 
71
  // If NOT verified, stop the application and show modal
72
- if (!profile.is_phone_verified) {
73
- setApplying(null); // Stop loading spinner
74
- setJobToApply(null); // Close application form
75
- setShowVerificationModal(true); // Open Verification Modal
76
- return; // 🛑 Stop execution here
77
- }
78
 
79
  // --- ✅ IF VERIFIED: Proceed with Application ---
80
  const { error } = await supabase
81
  .from('applications')
82
- .insert([{
83
- job_id: jobToApply.id,
84
  user_id: user.id,
85
  status: 'Pending',
86
- resume_url: formData.resume_url,
87
- cover_letter: formData.cover_letter
88
  }]);
89
 
90
  if (error) throw error;
91
 
92
  setAppliedJobIds(prev => new Set(prev).add(jobToApply.id));
93
- alert("Application submitted successfully!");
94
-
95
- setJobToApply(null);
96
 
97
  } catch (error) {
98
  console.error("Error applying:", error.message);
@@ -144,16 +144,16 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
144
  <input type="text" value={searchQuery} onChange={(e) => setSearchQuery(e.target.value)} placeholder="Search by job title..." style={{ width: '100%', padding: '0.75rem 1rem 0.75rem 2.5rem', borderRadius: '0.5rem', border: '1px solid rgba(251, 191, 36, 0.3)', backgroundColor: 'rgba(255,255,255,0.1)', color: 'white' }} />
145
  </div>
146
  </div>
147
-
148
  {/* Job Grid */}
149
  <motion.main layout style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))', gap: '2rem' }}>
150
  <AnimatePresence>
151
  {filteredJobListings.length > 0 ? (
152
  filteredJobListings.map((job) => (
153
  <motion.div key={job.id} layout initial={{ opacity: 0, scale: 0.8 }} animate={{ opacity: 1, scale: 1 }} exit={{ opacity: 0, scale: 0.8 }} transition={{ duration: 0.2 }}>
154
- <JobCard
155
- {...job}
156
- onViewDetails={() => setSelectedJob(job)}
157
  onApply={() => initiateApply(job.id)}
158
  onWithdraw={() => handleWithdraw(job.id)}
159
  isApplied={appliedJobIds.has(job.id)}
@@ -162,16 +162,16 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
162
  </motion.div>
163
  ))
164
  ) : (
165
- <motion.p initial={{opacity: 0}} animate={{opacity: 1}} style={{ color: '#d1d5db' }}>No jobs found.</motion.p>
166
  )}
167
  </AnimatePresence>
168
  </motion.main>
169
 
170
  {/* Job Detail Modal */}
171
  {selectedJob && (
172
- <JobDetail
173
- job={selectedJob}
174
- onClose={() => setSelectedJob(null)}
175
  onApply={() => initiateApply(selectedJob.id)}
176
  isApplied={appliedJobIds.has(selectedJob.id)}
177
  isApplying={applying === selectedJob.id}
@@ -180,7 +180,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
180
 
181
  {/* Apply Form Modal */}
182
  {jobToApply && (
183
- <ApplyModel
184
  job={jobToApply}
185
  isSubmitting={applying === jobToApply.id}
186
  onClose={() => setJobToApply(null)}
@@ -190,7 +190,7 @@ export default function JobListings({ searchQuery, setSearchQuery, isSearching,
190
 
191
  {/* ✅ OTP Verification Modal */}
192
  {showVerificationModal && (
193
- <VerificationModal
194
  onClose={() => setShowVerificationModal(false)}
195
  onVerified={() => {
196
  setShowVerificationModal(false);
 
1
  import React, { useState, useEffect } from 'react';
2
  import { motion, AnimatePresence } from 'framer-motion';
3
+ import { supabase } from '../supabaseClient';
4
+ import { SearchIcon } from './Icons';
5
+ import JobDetail from './JobDetail';
6
+ import ApplyModel from './ApplyModel';
7
+ import JobCard from './JobCard';
8
  import VerificationModal from './VerificationModal'; // ✅ Import the new modal
9
 
10
  export default function JobListings({ searchQuery, setSearchQuery, isSearching, filteredJobListings }) {
11
+
12
  const [selectedJob, setSelectedJob] = useState(null);
13
  const [appliedJobIds, setAppliedJobIds] = useState(new Set());
14
+ const [applying, setApplying] = useState(null);
15
+
16
  // State for the Apply Modal
17
  const [jobToApply, setJobToApply] = useState(null);
18
 
 
28
  .from('applications')
29
  .select('job_id')
30
  .eq('user_id', user.id);
31
+
32
  if (data) {
33
  setAppliedJobIds(new Set(data.map(app => app.job_id)));
34
  }
 
40
  // 2. Open Apply Modal
41
  const initiateApply = (jobId) => {
42
  const job = filteredJobListings.find(j => j.id === jobId);
43
+ if (job) {
44
  setJobToApply(job);
45
  }
46
  };
 
48
  // 3. Submit Application (With Verification Gatekeeper)
49
  const handleFinalSubmit = async (formData) => {
50
  if (!jobToApply) return;
51
+
52
  setApplying(jobToApply.id);
53
+
54
  try {
55
  const { data: { user } } = await supabase.auth.getUser();
56
+
57
  if (!user) {
58
  alert("Please log in to apply.");
59
  return;
 
69
  if (profileError) throw profileError;
70
 
71
  // If NOT verified, stop the application and show modal
72
+ /** if (!profile.is_phone_verified) {
73
+ setApplying(null); // Stop loading spinner
74
+ setJobToApply(null); // Close application form
75
+ setShowVerificationModal(true); // Open Verification Modal
76
+ return; // 🛑 Stop execution here
77
+ } **/
78
 
79
  // --- ✅ IF VERIFIED: Proceed with Application ---
80
  const { error } = await supabase
81
  .from('applications')
82
+ .insert([{
83
+ job_id: jobToApply.id,
84
  user_id: user.id,
85
  status: 'Pending',
86
+ resume_url: formData.resume_url,
87
+ cover_letter: formData.cover_letter
88
  }]);
89
 
90
  if (error) throw error;
91
 
92
  setAppliedJobIds(prev => new Set(prev).add(jobToApply.id));
93
+ alert("Application submitted successfully!");
94
+
95
+ setJobToApply(null);
96
 
97
  } catch (error) {
98
  console.error("Error applying:", error.message);
 
144
  <input type="text" value={searchQuery} onChange={(e) => setSearchQuery(e.target.value)} placeholder="Search by job title..." style={{ width: '100%', padding: '0.75rem 1rem 0.75rem 2.5rem', borderRadius: '0.5rem', border: '1px solid rgba(251, 191, 36, 0.3)', backgroundColor: 'rgba(255,255,255,0.1)', color: 'white' }} />
145
  </div>
146
  </div>
147
+
148
  {/* Job Grid */}
149
  <motion.main layout style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))', gap: '2rem' }}>
150
  <AnimatePresence>
151
  {filteredJobListings.length > 0 ? (
152
  filteredJobListings.map((job) => (
153
  <motion.div key={job.id} layout initial={{ opacity: 0, scale: 0.8 }} animate={{ opacity: 1, scale: 1 }} exit={{ opacity: 0, scale: 0.8 }} transition={{ duration: 0.2 }}>
154
+ <JobCard
155
+ {...job}
156
+ onViewDetails={() => setSelectedJob(job)}
157
  onApply={() => initiateApply(job.id)}
158
  onWithdraw={() => handleWithdraw(job.id)}
159
  isApplied={appliedJobIds.has(job.id)}
 
162
  </motion.div>
163
  ))
164
  ) : (
165
+ <motion.p initial={{ opacity: 0 }} animate={{ opacity: 1 }} style={{ color: '#d1d5db' }}>No jobs found.</motion.p>
166
  )}
167
  </AnimatePresence>
168
  </motion.main>
169
 
170
  {/* Job Detail Modal */}
171
  {selectedJob && (
172
+ <JobDetail
173
+ job={selectedJob}
174
+ onClose={() => setSelectedJob(null)}
175
  onApply={() => initiateApply(selectedJob.id)}
176
  isApplied={appliedJobIds.has(selectedJob.id)}
177
  isApplying={applying === selectedJob.id}
 
180
 
181
  {/* Apply Form Modal */}
182
  {jobToApply && (
183
+ <ApplyModel
184
  job={jobToApply}
185
  isSubmitting={applying === jobToApply.id}
186
  onClose={() => setJobToApply(null)}
 
190
 
191
  {/* ✅ OTP Verification Modal */}
192
  {showVerificationModal && (
193
+ <VerificationModal
194
  onClose={() => setShowVerificationModal(false)}
195
  onVerified={() => {
196
  setShowVerificationModal(false);
src/pages/Admindashboard.jsx CHANGED
@@ -1,6 +1,6 @@
1
  import React, { useState } from 'react';
2
  import { motion, AnimatePresence } from 'framer-motion';
3
- import { supabase } from '../supabaseClient';
4
 
5
  // Import the new split modules
6
  import AdminLayout from '../components/admin/AdminLayout';
@@ -9,6 +9,7 @@ import AdminSortingPage from '../components/admin/AdminSortingPage';
9
  import AdminInterviewManagement from '../components/admin/AdminInterviewManagement';
10
  import AdminProfile from '../components/admin/AdminProfile';
11
  import JobPosting from './JobPosting'; // Import your existing JobPosting component
 
12
 
13
  export default function AdminDashboard({ onNavigate }) {
14
  const [activeTab, setActiveTab] = useState('dashboard');
@@ -18,23 +19,25 @@ export default function AdminDashboard({ onNavigate }) {
18
  switch (activeTab) {
19
  case 'dashboard':
20
  return <AdminSummary onNavigate={onNavigate} setIsModalOpen={setIsModalOpen} />;
21
- case 'jobs':
22
  return <AdminSortingPage />;
23
- case 'messages':
24
  return <AdminInterviewManagement />;
25
- case 'job-management':
26
  return <JobPosting />;
27
- case 'settings':
 
 
28
  return <AdminProfile onNavigate={onNavigate} />;
29
- default:
30
  return null;
31
  }
32
  };
33
 
34
- const contentVariants = {
35
- hidden: { opacity: 0, y: 10 },
36
- visible: { opacity: 1, y: 0 },
37
- exit: { opacity: 0, y: -10 }
38
  };
39
 
40
  return (
 
1
  import React, { useState } from 'react';
2
  import { motion, AnimatePresence } from 'framer-motion';
3
+ import { supabase } from '../supabaseClient';
4
 
5
  // Import the new split modules
6
  import AdminLayout from '../components/admin/AdminLayout';
 
9
  import AdminInterviewManagement from '../components/admin/AdminInterviewManagement';
10
  import AdminProfile from '../components/admin/AdminProfile';
11
  import JobPosting from './JobPosting'; // Import your existing JobPosting component
12
+ import TalentClusters from '../components/Admin/TalentClusters';
13
 
14
  export default function AdminDashboard({ onNavigate }) {
15
  const [activeTab, setActiveTab] = useState('dashboard');
 
19
  switch (activeTab) {
20
  case 'dashboard':
21
  return <AdminSummary onNavigate={onNavigate} setIsModalOpen={setIsModalOpen} />;
22
+ case 'jobs':
23
  return <AdminSortingPage />;
24
+ case 'messages':
25
  return <AdminInterviewManagement />;
26
+ case 'job-management':
27
  return <JobPosting />;
28
+ case 'clusters':
29
+ return <TalentClusters />;
30
+ case 'settings':
31
  return <AdminProfile onNavigate={onNavigate} />;
32
+ default:
33
  return null;
34
  }
35
  };
36
 
37
+ const contentVariants = {
38
+ hidden: { opacity: 0, y: 10 },
39
+ visible: { opacity: 1, y: 0 },
40
+ exit: { opacity: 0, y: -10 }
41
  };
42
 
43
  return (
src/pages/ApplicantProfile.jsx CHANGED
@@ -25,7 +25,7 @@ export default function ApplicantProfile({ onNavigate }) {
25
  try {
26
  // Get current user
27
  const { data: { user } } = await supabase.auth.getUser();
28
-
29
  if (user) {
30
  // Fetch Profile using maybeSingle() to avoid errors if empty
31
  const { data: profile, error } = await supabase
@@ -44,7 +44,7 @@ export default function ApplicantProfile({ onNavigate }) {
44
  setFormData(combinedData);
45
  setOriginalFormData(combinedData);
46
  if (profile.avatar_url) {
47
- setAvatarUrl(profile.avatar_url);
48
  }
49
  } else {
50
  // New user - Initialize with just email
@@ -90,7 +90,7 @@ export default function ApplicantProfile({ onNavigate }) {
90
  const newValue = type === 'checkbox' ? checked : value;
91
  setFormData(prev => ({ ...prev, [name]: newValue }));
92
  };
93
-
94
  const handleAddExperience = () => {
95
  const newExperience = { id: Date.now(), company: '', role: '', years: '' };
96
  setFormData(prev => ({
@@ -98,7 +98,7 @@ export default function ApplicantProfile({ onNavigate }) {
98
  work_experience: [...(prev.work_experience || []), newExperience]
99
  }));
100
  };
101
-
102
  const handleExperienceChange = (index, e) => {
103
  const { name, value } = e.target;
104
  const updatedExperience = [...(formData.work_experience || [])];
@@ -110,7 +110,7 @@ export default function ApplicantProfile({ onNavigate }) {
110
  if (!isEditing || !e.target.files || e.target.files.length === 0) return;
111
  setResumeFile(e.target.files[0]);
112
  };
113
-
114
  const handleAvatarFileChange = (e) => {
115
  if (!isEditing || !e.target.files || e.target.files.length === 0) return;
116
  const file = e.target.files[0];
@@ -135,12 +135,23 @@ export default function ApplicantProfile({ onNavigate }) {
135
  }
136
 
137
  if (resumeFile) {
 
 
 
 
 
 
 
 
 
 
 
138
  const filePath = `${user.id}/${Date.now()}_${resumeFile.name}`;
139
  // Make sure your bucket is named 'resumes' (plural) or 'resume' (singular) to match your Supabase Storage
140
  await supabase.storage.from('resume').upload(filePath, resumeFile, { upsert: true });
141
  updates.resume_url = filePath;
142
  }
143
-
144
  const { error } = await supabase.from('profiles').upsert(updates);
145
  if (error) throw error;
146
 
 
25
  try {
26
  // Get current user
27
  const { data: { user } } = await supabase.auth.getUser();
28
+
29
  if (user) {
30
  // Fetch Profile using maybeSingle() to avoid errors if empty
31
  const { data: profile, error } = await supabase
 
44
  setFormData(combinedData);
45
  setOriginalFormData(combinedData);
46
  if (profile.avatar_url) {
47
+ setAvatarUrl(profile.avatar_url);
48
  }
49
  } else {
50
  // New user - Initialize with just email
 
90
  const newValue = type === 'checkbox' ? checked : value;
91
  setFormData(prev => ({ ...prev, [name]: newValue }));
92
  };
93
+
94
  const handleAddExperience = () => {
95
  const newExperience = { id: Date.now(), company: '', role: '', years: '' };
96
  setFormData(prev => ({
 
98
  work_experience: [...(prev.work_experience || []), newExperience]
99
  }));
100
  };
101
+
102
  const handleExperienceChange = (index, e) => {
103
  const { name, value } = e.target;
104
  const updatedExperience = [...(formData.work_experience || [])];
 
110
  if (!isEditing || !e.target.files || e.target.files.length === 0) return;
111
  setResumeFile(e.target.files[0]);
112
  };
113
+
114
  const handleAvatarFileChange = (e) => {
115
  if (!isEditing || !e.target.files || e.target.files.length === 0) return;
116
  const file = e.target.files[0];
 
135
  }
136
 
137
  if (resumeFile) {
138
+ // Delete old resume if it exists to prevent duplication
139
+ if (originalFormData?.resume_url) {
140
+ try {
141
+ const oldPath = originalFormData.resume_url;
142
+ const { error: removeError } = await supabase.storage.from('resume').remove([oldPath]);
143
+ if (removeError) console.warn("Could not delete old resume:", removeError.message);
144
+ } catch (e) {
145
+ console.warn("Exception during old resume removal:", e);
146
+ }
147
+ }
148
+
149
  const filePath = `${user.id}/${Date.now()}_${resumeFile.name}`;
150
  // Make sure your bucket is named 'resumes' (plural) or 'resume' (singular) to match your Supabase Storage
151
  await supabase.storage.from('resume').upload(filePath, resumeFile, { upsert: true });
152
  updates.resume_url = filePath;
153
  }
154
+
155
  const { error } = await supabase.from('profiles').upsert(updates);
156
  if (error) throw error;
157
 
system_architecture.txt ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IRIS Detailed System Architecture
2
+
3
+ This document provides a comprehensive look at the IRIS architecture, broken down by functional layers and individual process steps.
4
+
5
+ ## Overall System Flow
6
+
7
+ This tiered diagram shows how data flows through the three main layers of the system.
8
+
9
+ ```mermaid
10
+ graph TD
11
+ subgraph "1. Ingestion & Preprocessing"
12
+ UC[User/Admin] -->|Upload| SS[Supabase Storage]
13
+ SS -->|Webhook| BE[FastAPI Backend]
14
+ BE -->|Download| PC[Text Cleaning]
15
+ PC -->|Anonymize| PA[PII Removal]
16
+ end
17
+
18
+ subgraph "2. NLP Processing Layer"
19
+ PA -->|Raw Text| EX[Gemini Extraction]
20
+ EX -->|JSON| DB[(Supabase DB)]
21
+ DB -->|Text Fields| EM[BGE-M3 Embedding]
22
+ EM -->|Vectors| DB
23
+ end
24
+
25
+ subgraph "3. Matching & AI Analysis"
26
+ DB -->|Job vs Resume| MS[Semantic Matching]
27
+ MS -->|Score| MG[Skill Gap Analysis]
28
+ MG -->|Insights| AI[Gemini Analysis]
29
+ AI -->|Final Report| UI[Admin Dashboard]
30
+ end
31
+ ```
32
+
33
+ ---
34
+
35
+ ## 1. Data Ingestion & Preprocessing
36
+ This layer ensures that incoming data is clean, secure, and ready for AI processing.
37
+
38
+ * **File Upload**: Resumes and Job Descriptions are stored securely in Supabase buckets.
39
+ * **Event Trigger**: Database Webhooks instantly notify the backend when a new file arrives.
40
+ * **Text Cleaning**: Standardizes encoding, removes special characters, and handles whitespace.
41
+ * **PII Anonymization**: Uses Regex and NLP patterns to detect and protect sensitive personal information (phone, address) before deep processing.
42
+
43
+ ## 2. NLP Processing Pipeline
44
+ The "Intelligence" layer that understands the meaning behind the text.
45
+
46
+ * **Structured Extraction**: Google Gemini parses unstructured text into logical objects (Skills, Experience, Education).
47
+ * **Relational Storage**: Structured data is saved into dedicated PostgreSQL tables for rapid querying.
48
+ * **Vector Embedding**: The BGE-M3 model creates "mathematical summaries" (vectors) of the candidate's profile and the job requirements.
49
+ * **Vector Search Index**: These vectors allow the system to find matches based on *meaning* rather than just keywords (e.g., matching "Software Engineer" with "Full Stack Developer").
50
+
51
+ ## 3. Matching & AI Analysis Layer
52
+ The decision-making layer that provides final value to the recruiter.
53
+
54
+ * **Semantic Scoring**: Calculates the mathematical distance between a candidate's vector and a job's vector.
55
+ * **Skill Gap Analysis**: Compares the extracted skill sets to identify exactly what is missing or where the candidate excels.
56
+ * **AI Insight Generation**: A second pass with Gemini generates a human-readable summary, custom strengths, and potential weaknesses.
57
+ * **Final Ranking**: Aggregates all scores into a prioritized list for the Admin dashboard.
58
+
59
+ ## Technology Stack
60
+
61
+ | Layer | Technologies |
62
+ | :--- | :--- |
63
+ | **Frontend** | React, Vite, Framer Motion, Lucide Icons |
64
+ | **Backend** | FastAPI, Python, SQLAlchemy/Supabase-py |
65
+ | **Data** | Supabase (Postgres), pgvector, Supabase Storage |
66
+ | **AI/ML** | Google Gemini (LLM), BGE-M3 (Embeddings), Sentence Transformers |