Spaces:

sameer2026
/

iris_backend

Sleeping

App Files Files Community

Muhammed Sameer commited on Jan 13

Commit

ea9ca44

0 Parent(s):

Initial commit - Iris Full (under development)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +24 -0
README.md +12 -0
Supabase/.temp/cli-latest +1 -0
Supabase/config.toml +66 -0
Supabase/functions/_shared/cors.ts +4 -0
Supabase/functions/generate-verification-token/.npmrc +3 -0
Supabase/functions/generate-verification-token/deno.json +3 -0
Supabase/functions/generate-verification-token/index.ts +60 -0
Supabase/functions/initiate-admin-transfer/.npmrc +3 -0
Supabase/functions/initiate-admin-transfer/deno.json +3 -0
Supabase/functions/initiate-admin-transfer/index.ts +80 -0
Supabase/functions/invite-first-admin/.npmrc +3 -0
Supabase/functions/invite-first-admin/deno.json +3 -0
Supabase/functions/invite-first-admin/index.ts +24 -0
Supabase/functions/otp/.npmrc +3 -0
Supabase/functions/otp/deno.json +3 -0
Supabase/functions/otp/index.ts +137 -0
Supabase/functions/send-interview-email/.npmrc +3 -0
Supabase/functions/send-interview-email/deno.json +3 -0
Supabase/functions/send-interview-email/index.ts +51 -0
Supabase/functions/verify-domain/.npmrc +3 -0
Supabase/functions/verify-domain/deno.json +3 -0
Supabase/functions/verify-domain/index.ts +32 -0
backend/.env +12 -0
backend/.gitignore +26 -0
backend/add_experience_to_embeddings.sql +5 -0
backend/add_projects_to_profiles.sql +5 -0
backend/api.py +157 -0
backend/create_profile_embeddings.sql +32 -0
backend/debug_payload.json +69 -0
backend/debug_resume.txt +6 -0
backend/requirements.txt +28 -0
backend/src/__init__.py +0 -0
backend/src/embeddings/__init__.py +0 -0
backend/src/embeddings/debug_embedding_storage.py +62 -0
backend/src/embeddings/job_embed.py +108 -0
backend/src/embeddings/local_embedder.py +137 -0
backend/src/embeddings/process_all_profiles.py +46 -0
backend/src/embeddings/test_embedder.py +34 -0
backend/src/extraction/__init__.py +0 -0
backend/src/extraction/fallback_extractor.py +51 -0
backend/src/extraction/job_extractor.py +181 -0
backend/src/extraction/person_details_extraction_gemini.py +220 -0
backend/src/extraction/test_regex.py +33 -0
backend/src/ingestion/__init__.py +0 -0
backend/src/ingestion/docx_reader.py +11 -0
backend/src/ingestion/parser.py +21 -0
backend/src/ingestion/pdf_reader.py +38 -0
backend/src/matching/__init__.py +0 -0
backend/src/matching/similarity.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules
+dist
+dist-ssr
+*.local
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+# React + Vite
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+Currently, two official plugins are available:
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
+## Expanding the ESLint configuration
+If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.

Supabase/.temp/cli-latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ v2.67.1

Supabase/config.toml ADDED Viewed

	@@ -0,0 +1,66 @@

+[functions.verify-domain]
+enabled = true
+verify_jwt = true
+import_map = "./functions/verify-domain/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/verify-domain/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/verify-domain/*.html" ]
+[functions.invite-first-admin]
+enabled = true
+verify_jwt = true
+import_map = "./functions/invite-first-admin/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/invite-first-admin/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/invite-first-admin/*.html" ]
+[functions.generate-verification-token]
+enabled = true
+verify_jwt = true
+import_map = "./functions/generate-verification-token/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/generate-verification-token/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/generate-verification-token/*.html" ]
+[functions.initiate-admin-transfer]
+enabled = true
+verify_jwt = true
+import_map = "./functions/initiate-admin-transfer/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/initiate-admin-transfer/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/initiate-admin-transfer/*.html" ]
+[functions.otp]
+enabled = true
+verify_jwt = true
+import_map = "./functions/otp/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/otp/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/otp/*.html" ]
+[functions.send-interview-email]
+enabled = true
+verify_jwt = true
+import_map = "./functions/send-interview-email/deno.json"
+# Uncomment to specify a custom file path to the entrypoint.
+# Supported file extensions are: .ts, .js, .mjs, .jsx, .tsx
+entrypoint = "./functions/send-interview-email/index.ts"
+# Specifies static files to be bundled with the function. Supports glob patterns.
+# For example, if you want to serve static HTML pages in your function:
+# static_files = [ "./functions/send-interview-email/*.html" ]

Supabase/functions/_shared/cors.ts ADDED Viewed

	@@ -0,0 +1,4 @@

+export const corsHeaders = {
+  'Access-Control-Allow-Origin': '*',
+  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
+}

Supabase/functions/generate-verification-token/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/generate-verification-token/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/generate-verification-token/index.ts ADDED Viewed

	@@ -0,0 +1,60 @@

+// supabase/functions/generate-verification-token/index.ts
+import { serve } from 'https://deno.land/std@0.177.0/http/server.ts'
+import { createClient } from 'https://esm.sh/@supabase/supabase-js@2.39.8'
+import { corsHeaders } from '../_shared/cors.ts'
+serve(async (req) => {
+  // This is the crucial block that handles the browser's preflight check
+  if (req.method === 'OPTIONS') {
+    return new Response('ok', { headers: corsHeaders })
+  }
+  try {
+    const { email } = await req.json();
+    const domain = email.split('@')[1];
+    if (!domain) {
+      throw new Error("Invalid email format.");
+    }
+    const supabaseAdmin = createClient(
+      Deno.env.get('SUPABASE_URL')!,
+      Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!
+    );
+    const { data: blockedDomain } = await supabaseAdmin
+      .from('blocked_domains')
+      .select('domain')
+      .eq('domain', domain)
+      .single();
+    if (blockedDomain) {
+      throw new Error("Please use a business email. Free email providers are not allowed.");
+    }
+    const { data, error } = await supabaseAdmin
+      .from('organizations')
+      .insert({
+        name: domain,
+        verified_domain: domain,
+      })
+      .select('verification_token')
+      .single();
+    if (error) throw error;
+    return new Response(JSON.stringify({
+        verification_token: data.verification_token,
+        domain: domain
+    }), {
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+      status: 200,
+    });
+  } catch (error) {
+    return new Response(JSON.stringify({ error: error.message }), {
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+      status: 400,
+    });
+  }
+})

Supabase/functions/initiate-admin-transfer/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/initiate-admin-transfer/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/initiate-admin-transfer/index.ts ADDED Viewed

	@@ -0,0 +1,80 @@

+import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
+import { v4 as uuidv4 } from 'https://deno.land/std@0.106.0/uuid/mod.ts';
+const corsHeaders = {
+  'Access-Control-Allow-Origin': '*',
+  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
+}
+Deno.serve(async (req) => {
+  if (req.method === 'OPTIONS') {
+    return new Response('ok', { headers: corsHeaders })
+  }
+  try {
+    const { newAdminEmail } = await req.json();
+    if (!newAdminEmail) {
+      throw new Error("New admin's email is required.");
+    }
+    // Create an admin client to bypass RLS
+    const supabaseAdmin = createClient(
+      Deno.env.get('SUPABASE_URL')!,
+      Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!
+    );
+    // Get the current user from the request's auth token
+    const authHeader = req.headers.get('Authorization')!;
+    const jwt = authHeader.replace('Bearer ', '');
+    const { data: { user } } = await supabaseAdmin.auth.getUser(jwt);
+    if (!user) {
+      throw new Error("Could not identify the current user.");
+    }
+    // 1. Generate a secure, unique token for the transfer
+    const transferToken = uuidv4();
+    const expiryDate = new Date();
+    expiryDate.setHours(expiryDate.getHours() + 24); // Token is valid for 24 hours
+    // 2. Store the token and link it to the current user's company
+    // This assumes the 'companies' table has 'admin_transfer_token' and 'admin_transfer_expires_at' columns
+    const { data: profile, error: profileError } = await supabaseAdmin
+      .from('profiles').select('company_id').eq('id', user.id).single();
+    if (profileError || !profile) throw new Error("Could not find the user's company.");
+    const { error: updateError } = await supabaseAdmin
+      .from('companies')
+      .update({
+        admin_transfer_token: transferToken,
+        admin_transfer_expires_at: expiryDate.toISOString(),
+      })
+      .eq('id', profile.company_id);
+    if (updateError) throw new Error("Failed to store the transfer token.");
+    // 3. Send a magic link email to the new admin
+    // This link should point to a page in your app that handles the token verification
+    const transferUrl = `${Deno.env.get('SITE_URL')}/accept-admin-transfer?token=${transferToken}`;
+    const { error: emailError } = await supabaseAdmin.auth.admin.generateLink({
+        type: 'magiclink',
+        email: newAdminEmail,
+        options: {
+            redirectTo: transferUrl
+        }
+    });
+    if (emailError) {
+        throw new Error("Could not send invitation email.");
+    }
+    return new Response(JSON.stringify({ success: true, message: "Transfer invitation sent." }), {
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+      status: 200,
+    })
+  } catch (err) {
+    return new Response(JSON.stringify({ error: err.message }), {
+      headers: { ...corsHeaders, 'Content-Type': 'application/json' },
+      status: 400,
+    })
+  }
+})

Supabase/functions/invite-first-admin/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/invite-first-admin/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/invite-first-admin/index.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+import { serve } from 'https://deno.land/std@0.177.0/http/server.ts'
+import { createClient } from 'https://esm.sh/@supabase/supabase-js@2.39.8'
+import { corsHeaders } from '../_shared/cors.ts'
+serve(async (req) => {
+  if (req.method === 'OPTIONS') { return new Response('ok', { headers: corsHeaders }) }
+  try {
+    const { adminEmail, domain } = await req.json();
+    if (!adminEmail || !domain) throw new Error("Admin email and domain are required.");
+    if (adminEmail.split('@')[1] !== domain) throw new Error("Admin email must belong to the verified domain.");
+    const supabaseAdmin = createClient(Deno.env.get('SUPABASE_URL')!, Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!);
+    const { data: orgData, error: orgError } = await supabaseAdmin.from('organizations').select('id').eq('verified_domain', domain).eq('is_verified', true).single();
+    if (orgError || !orgData) throw new Error("Cannot send invite: Organization is not verified.");
+    const { error: inviteError } = await supabaseAdmin.auth.admin.inviteUserByEmail(adminEmail);
+    if (inviteError) throw inviteError;
+    return new Response(JSON.stringify({ success: true, message: `Invitation sent to ${adminEmail}.` }), { headers: { ...corsHeaders, 'Content-Type': 'application/json' } });
+  } catch (error) {
+    return new Response(JSON.stringify({ error: error.message }), { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 });
+  }
+})

Supabase/functions/otp/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/otp/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/otp/index.ts ADDED Viewed

	@@ -0,0 +1,137 @@

+import { serve } from "https://deno.land/std@0.168.0/http/server.ts";
+import { createClient } from "https://esm.sh/@supabase/supabase-js@2";
+const corsHeaders = {
+  'Access-Control-Allow-Origin': '*',
+  'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
+};
+serve(async (req) => {
+  if (req.method === 'OPTIONS') {
+    return new Response('ok', { headers: corsHeaders });
+  }
+  try {
+    // 1. Init Supabase Clients
+    const supabaseAdmin = createClient(
+      Deno.env.get('SUPABASE_URL') ?? '',
+      Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') ?? ''
+    );
+    const authHeader = req.headers.get('Authorization')!;
+    const supabaseClient = createClient(
+      Deno.env.get('SUPABASE_URL') ?? '',
+      Deno.env.get('SUPABASE_ANON_KEY') ?? '',
+      { global: { headers: { Authorization: authHeader } } }
+    );
+    // 2. Auth Check
+    const { data: { user }, error: authError } = await supabaseClient.auth.getUser();
+    if (authError || !user) throw new Error("Unauthorized");
+    const { action, userCode } = await req.json();
+    // ==========================================
+    // ACTION: SEND SMS (VIA TWILIO)
+    // ==========================================
+    if (action === 'send') {
+      const { data: profile } = await supabaseAdmin
+        .from('profiles')
+        .select('phone')
+        .eq('id', user.id)
+        .single();
+      if (!profile?.phone) throw new Error("No phone number found in profile.");
+      const phone = profile.phone;
+      const otp = Math.floor(100000 + Math.random() * 900000).toString();
+      const expiresAt = new Date(Date.now() + 5 * 60 * 1000).toISOString();
+      // Upsert OTP to DB
+      const { error: upsertError } = await supabaseAdmin
+        .from('otp_verifications')
+        .upsert({ phone, otp_code: otp, expires_at: expiresAt, attempts_count: 0 }, { onConflict: 'phone' });
+      if (upsertError) throw upsertError;
+      // --- TWILIO SENDING LOGIC STARTS HERE ---
+      const accountSid = Deno.env.get("TWILIO_ACCOUNT_SID");
+      const authToken = Deno.env.get("TWILIO_AUTH_TOKEN");
+      const fromNumber = Deno.env.get("TWILIO_PHONE_NUMBER");
+      if (!accountSid || !authToken || !fromNumber) {
+        throw new Error("Twilio secrets are missing in Supabase.");
+      }
+      // Format parameters for Twilio API
+      const params = new URLSearchParams();
+      params.append('To', phone);
+      params.append('From', fromNumber);
+      params.append('Body', `Your Verification Code is: ${otp}`);
+      console.log(`Sending SMS to ${phone}...`);
+      const twilioRes = await fetch(
+        `https://api.twilio.com/2010-04-01/Accounts/${accountSid}/Messages.json`,
+        {
+          method: "POST",
+          headers: {
+            "Authorization": `Basic ${btoa(`${accountSid}:${authToken}`)}`,
+            "Content-Type": "application/x-www-form-urlencoded",
+          },
+          body: params,
+        }
+      );
+      if (!twilioRes.ok) {
+        const errorText = await twilioRes.text();
+        console.error("Twilio Error:", errorText);
+        throw new Error("Failed to send SMS. Check server logs.");
+      }
+      // --- TWILIO LOGIC ENDS HERE ---
+      return new Response(
+        JSON.stringify({ message: "OTP sent successfully" }),
+        { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
+      );
+    }
+    // ==========================================
+    // ACTION: VERIFY
+    // ==========================================
+    if (action === 'verify') {
+      if (!userCode) throw new Error("Missing OTP code");
+      const { data: profile } = await supabaseAdmin.from('profiles').select('phone').eq('id', user.id).single();
+      const phone = profile?.phone;
+      const { data: record } = await supabaseAdmin.from('otp_verifications').select('*').eq('phone', phone).single();
+      if (!record) throw new Error("Invalid or expired OTP.");
+      if (new Date() > new Date(record.expires_at)) throw new Error("OTP has expired.");
+      if (record.attempts_count >= 3) throw new Error("Too many attempts.");
+      if (record.otp_code !== userCode) {
+        await supabaseAdmin.from('otp_verifications').update({ attempts_count: record.attempts_count + 1 }).eq('phone', phone);
+        throw new Error("Incorrect OTP code.");
+      }
+      // Success
+      await supabaseAdmin.from('profiles').update({ is_phone_verified: true }).eq('id', user.id);
+      await supabaseAdmin.from('otp_verifications').delete().eq('phone', phone);
+      return new Response(
+        JSON.stringify({ message: "Phone verified successfully!" }),
+        { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 200 }
+      );
+    }
+    return new Response(JSON.stringify({ error: "Invalid Action" }), { status: 400, headers: corsHeaders });
+  } catch (error) {
+    return new Response(
+      JSON.stringify({ error: error.message }),
+      { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 }
+    );
+  }
+});

Supabase/functions/send-interview-email/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/send-interview-email/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/send-interview-email/index.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+// supabase/functions/send-interview-email/index.ts
+import { serve } from "https://deno.land/std@0.168.0/http/server.ts";
+import { Resend } from "npm:resend";
+const resend = new Resend(Deno.env.get("RESEND_API_KEY"));
+const corsHeaders = {
+  "Access-Control-Allow-Origin": "*",
+  "Access-Control-Allow-Headers": "authorization, x-client-info, apikey, content-type",
+};
+serve(async (req) => {
+  if (req.method === "OPTIONS") return new Response("ok", { headers: corsHeaders });
+  try {
+    const { candidateName, candidateEmail, date, time, meetingLink, role } = await req.json();
+    const { data, error } = await resend.emails.send({
+      from: "Acme HR <onboarding@resend.dev>", // Change this to your verified domain if you have one
+      to: [candidateEmail],
+      subject: `Interview Invitation: ${role}`,
+      html: `
+        <div style="font-family: sans-serif; padding: 20px;">
+          <h1>Hi ${candidateName},</h1>
+          <p>We are pleased to invite you to a <strong>Technical Interview</strong> for the <strong>${role}</strong> position.</p>
+          <div style="background: #f3f4f6; padding: 15px; border-radius: 8px; margin: 20px 0;">
+            <p style="margin: 5px 0;"><strong>📅 Date:</strong> ${date}</p>
+            <p style="margin: 5px 0;"><strong>⏰ Time:</strong> ${time}</p>
+            <p style="margin: 5px 0;"><strong>🔗 Link:</strong> <a href="${meetingLink}">${meetingLink}</a></p>
+          </div>
+          <p>Please join 5 minutes early.</p>
+          <p>Best,<br>Hiring Team</p>
+        </div>
+      `,
+    });
+    if (error) throw error;
+    return new Response(JSON.stringify(data), {
+      headers: { ...corsHeaders, "Content-Type": "application/json" },
+      status: 200,
+    });
+  } catch (error) {
+    return new Response(JSON.stringify({ error: error.message }), {
+      headers: { ...corsHeaders, "Content-Type": "application/json" },
+      status: 500,
+    });
+  }
+});

Supabase/functions/verify-domain/.npmrc ADDED Viewed

	@@ -0,0 +1,3 @@

+# Configuration for private npm package dependencies
+# For more information on using private registries with Edge Functions, see:
+# https://supabase.com/docs/guides/functions/import-maps#importing-from-private-registries

Supabase/functions/verify-domain/deno.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "imports": {}
+}

Supabase/functions/verify-domain/index.ts ADDED Viewed

	@@ -0,0 +1,32 @@

+import { serve } from 'https://deno.land/std@0.177.0/http/server.ts'
+import { createClient } from 'https://esm.sh/@supabase/supabase-js@2.39.8'
+import { corsHeaders } from '../_shared/cors.ts'
+serve(async (req) => {
+  if (req.method === 'OPTIONS') { return new Response('ok', { headers: corsHeaders }) }
+  try {
+    const { domain } = await req.json();
+    if (!domain) throw new Error("Domain is required.");
+    const supabaseAdmin = createClient(Deno.env.get('SUPABASE_URL')!, Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')!);
+    const { data: orgData, error: orgError } = await supabaseAdmin.from('organizations').select('verification_token, is_verified').eq('verified_domain', domain).single();
+    if (orgError) throw new Error("Could not find an organization for this domain.");
+    if (orgData.is_verified) return new Response(JSON.stringify({ success: true, message: 'Domain is already verified.' }), { headers: { ...corsHeaders, 'Content-Type': 'application/json' } });
+    const expectedToken = `${orgData.verification_token}`;
+    let isVerified = false;
+    const txtRecords = await Deno.resolveDns(domain, "TXT");
+    for (const record of txtRecords) { if (record.includes(expectedToken)) { isVerified = true; break; } }
+    if (isVerified) {
+      await supabaseAdmin.from('organizations').update({ is_verified: true, verification_token: null // <-- The added line }).eq('verified_domain', domain);
+      return new Response(JSON.stringify({ success: true, message: 'Domain verified!' }), { headers: { ...corsHeaders, 'Content-Type': 'application/json' } });
+    } else {
+      throw new Error("Verification failed. TXT record not found or has not propagated yet.");
+    }
+  } catch (error) {
+    return new Response(JSON.stringify({ error: error.message }), { headers: { ...corsHeaders, 'Content-Type': 'application/json' }, status: 400 });
+  }
+})

backend/.env ADDED Viewed

	@@ -0,0 +1,12 @@

+OPENAI_API_KEY=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
+GEMINI_API_KEY=AIzaSyA0lgoagdthXdxR_nMhqI5FSu5crY0gd7Y
+# Supabase configuration (fill these with your project values)
+# SUPABASE_URL: e.g. https://your-project.supabase.co
+# SUPABASE_KEY: service role key or anon key (prefer service role for server-side ops)
+SUPABASE_URL=https://obhychdzwbytlzwrjrbl.supabase.co
+SUPABASE_SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9iaHljaGR6d2J5dGx6d3JqcmJsIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc1ODg4MTA1OSwiZXhwIjoyMDc0NDU3MDU5fQ.8_-9OY1Ae89TOKMd8foK3ojilBhrHWhg_w2cz-YWsCA
+# Optional: storage bucket and prefix to fetch resumes from
+SUPABASE_BUCKET=resume
+SUPABASE_PREFIX=""
+# Set to 1/true/yes to enable automatic fetching from Supabase when running `run_pipeline.py`
+USE_SUPABASE_RAW=1

backend/.gitignore ADDED Viewed

	@@ -0,0 +1,26 @@

+# Secrets
+# Python
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.venv/
+pip-log.txt
+pip-delete-this-directory.txt
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.pytest_cache/
+# Data
+data/

backend/add_experience_to_embeddings.sql ADDED Viewed

	@@ -0,0 +1,5 @@

+-- Add the missing 'experience' column to profile_embeddings
+-- BAAI/bge-m3 uses 1024 dimensions
+alter table profile_embeddings
+add column if not exists experience vector(1024);

backend/add_projects_to_profiles.sql ADDED Viewed

	@@ -0,0 +1,5 @@

+-- Add 'projects' column to profiles table to store extracted project details
+-- It will store a JSONB array of objects: [{ title, technologies_used, description }]
+alter table profiles
+add column if not exists projects jsonb default '[]'::jsonb;

backend/api.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# api.py
+import os
+from dotenv import load_dotenv
+# Load env BEFORE importing modules that depend on it
+load_dotenv()
+from fastapi import FastAPI, HTTPException, UploadFile, Form, File
+from pydantic import BaseModel
+from supabase import create_client
+from fastapi.middleware.cors import CORSMiddleware
+from supabase_ingest import process_resume
+from src.extraction.job_extractor import process_single_job
+from src.services.ats_service import analyze_ats_compatibility
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"], # Allow all origins for dev; restrict in prod
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Setup Supabase Client
+SUPABASE_URL = os.environ.get("SUPABASE_URL")
+# Use Service Role Key if available to bypass RLS
+SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
+if not SUPABASE_URL or not SUPABASE_KEY:
+    raise RuntimeError("SUPABASE_URL and SUPABASE_KEY (or SUPABASE_SERVICE_ROLE_KEY) must be set in .env")
+client = create_client(SUPABASE_URL, SUPABASE_KEY)
+# Define the data we expect from the frontend
+class ResumeRequest(BaseModel):
+    user_id: str
+    file_path: str  # e.g., "user_123/resume.pdf"
+@app.post("/process-resume")
+async def process_resume_endpoint(request: ResumeRequest):
+    print(f"🔔 Signal received: Process resume for {request.user_id}")
+    try:
+        # Delegate everything to the unified function
+        extracted_data = process_resume(client, request.user_id, request.file_path)
+        return {"status": "success", "data": extracted_data}
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# ---------------------------------------------------------------------
+# WEBHOOK ENDPOINT (Called by Supabase)
+# ---------------------------------------------------------------------
+from typing import Dict, Any, Optional
+class StorageEventRequest(BaseModel):
+    type: str
+    table: str
+    record: Dict[str, Any]
+    schema: str
+    old_record: Optional[Dict[str, Any]] = None
+@app.post("/webhook/storage")
+async def storage_webhook(request: StorageEventRequest):
+    """
+    Handles Database Webhooks from Supabase (storage.objects insert).
+    """
+    print(f"🔔 Webhook received: {request.type} on {request.table}")
+    # We only care about INSERTs or UPDATEs (overwrites) to the 'resume' bucket
+    if request.type not in ["INSERT", "UPDATE"] or request.table != "objects":
+        return {"status": "ignored"}
+    # Extract file details from the record
+    # Object path example: "user_123/123456_resume.pdf"
+    file_path = request.record.get("name")
+    bucket_id = request.record.get("bucket_id")
+    # Check bucket
+    if bucket_id != "resume":
+        print(f"⚠️ Ignoring upload to bucket: {bucket_id}")
+        return {"status": "ignored", "reason": "wrong bucket"}
+    # Extract User ID (assuming folder structure: user_id/filename)
+    try:
+        user_id = file_path.split("/")[0]
+    except Exception:
+        print(f"❌ Could not extract user_id from {file_path}")
+        return {"status": "error", "message": "invalid file path structure"}
+    print(f"▶️ Triggering processing for {file_path}")
+    # Call the processing logic
+    try:
+        process_resume(client, user_id, file_path)
+        return {"status": "success"}
+    except Exception as e:
+        print(f"❌ Processing failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/webhook/jobs")
+async def jobs_webhook(request: StorageEventRequest):
+    """
+    Handles Database Webhooks from Supabase (jobs table UPDATE/INSERT).
+    """
+    print(f"🔔 Webhook received: {request.type} on {request.table}")
+    if request.table != "jobs":
+        return {"status": "ignored", "reason": "wrong table"}
+    # We care about INSERT and UPDATE
+    # For UPDATE, we might want to check if description changed, but for now we runs it anyway
+    new_record = request.record
+    job_id = new_record.get("id")
+    description = new_record.get("description")
+    experience_level = new_record.get("experience_level")
+    if not job_id:
+        print("❌ Webhook missing job_id")
+        return {"status": "error", "message": "missing id"}
+    print(f"▶️ Triggering job extraction for Job ID: {job_id}")
+    try:
+        # Re-use global client from line 32
+        process_single_job(client, job_id, description, experience_level)
+        return {"status": "success"}
+    except Exception as e:
+        print(f"❌ Job processing failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze-ats")
+async def analyze_ats_endpoint(
+    resume: UploadFile = File(...),
+    job_description: str = Form(...)
+):
+    """
+    Real-time ATS analysis endpoint.
+    Does not save to DB (unless you want to add that logic).
+    """
+    print(f"🔍 Analyzing ATS compatibility for: {resume.filename}")
+    try:
+        result = await analyze_ats_compatibility(resume, job_description)
+        return {"status": "success", "data": result}
+    except Exception as e:
+        print(f"❌ ATS Analysis failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# Run with: uvicorn api:app --reload

backend/create_profile_embeddings.sql ADDED Viewed

	@@ -0,0 +1,32 @@

+-- Enable the pgvector extension to work with embedding vectors
+create extension if not exists vector;
+-- Create a table to store embeddings for each profile column
+-- We use 1024 dimensions for the BAAI/bge-m3 model
+create table if not exists profile_embeddings (
+  id uuid references profiles(id) on delete cascade primary key,
+  headline vector(1024),
+  summary vector(1024),
+  skills vector(1024),
+  technical_skills vector(1024),
+  experience vector(1024),
+  certifications vector(1024),
+  languages vector(1024),
+  created_at timestamp with time zone default timezone('utc'::text, now()) not null,
+  updated_at timestamp with time zone default timezone('utc'::text, now()) not null
+);
+-- Enable Row Level Security (RLS)
+alter table profile_embeddings enable row level security;
+-- Create policies (Adjust based on your actual auth requirements)
+-- Allow read access to everyone (or authenticated users)
+create policy "Allow read access for all users"
+on profile_embeddings for select
+using ( true );
+-- Allow update/insert only for service_role or the user who owns the profile
+-- (Assuming auth.uid() matches the profile id)
+create policy "Users can update their own embeddings"
+on profile_embeddings for all
+using ( auth.uid() = id );

backend/debug_payload.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "id": "81185bdc-85be-4ff2-99c7-16cf8356cb51",
+  "resume_url": "81185bdc-85be-4ff2-99c7-16cf8356cb51/resume.pdf",
+  "file_hash": "f6f9d1e0b3badc01329126aa9f249a3e26f1ba12e26d274de0323c359faa1c13",
+  "processed": true,
+  "updated_at": "now()",
+  "full_name": "med Raffi",
+  "summary": "Computer Science student proficient in Python, Java, and C with strong skills in Object-Oriented Programming. Experienced in software development and version control using Git. Adaptable team player focused on solving complex technical challenges.",
+  "phone": "+9195390771",
+  "email": "saheedmuhammedraffi@gmail.com",
+  "skills": [
+    "Communication",
+    "Teamwork",
+    "Adaptability",
+    "Analytical Thinking"
+  ],
+  "technical_skills": "Python, Java, C, SQL, HTML, CSS, JavaScript, Flask, React, Pandas, Scikit-learn, NumPy, Git, VSCode, GoogleColab, Docker, TensorFlow",
+  "education": [
+    {
+      "course": "B.Tech in Computer Science and Engineering",
+      "institution": "Carmel College of Engineering and Technology, Alappuzha",
+      "year": "2022 Present"
+    },
+    {
+      "course": "Higher Secondary Education",
+      "institution": "S.D.V. English Medium Higher Secondary School, Alappuzha",
+      "year": null
+    }
+  ],
+  "work_experience": [
+    {
+      "role": "AI/ML Intern",
+      "company": "ICT Academy of Kerala, Trivandrum",
+      "years": "Jun 2025 - Jul 2025",
+      "description": "Underwent a 1-month internship on Artificial Intelligence and Machine Learning. Collaborated with a 5-member team to deploy a prototype ML model tested on real-world datasets."
+    },
+    {
+      "role": "Webmaster",
+      "company": "IEEE Computer Society",
+      "years": "July 2025 Present",
+      "description": "Developed a responsive web portal and admin dashboard to streamline real-time event tracking and member registration."
+    },
+    {
+      "role": "TEDxCCET Curation Lead",
+      "company": "Dept. of Computer Science, CCET",
+      "years": "Nov 2025 Present",
+      "description": "Manage speaker logistics, schedules, and deliverables to ensure strict adherence to event timelines. Coordinate technical requirements and stage cues between speakers and the production team."
+    }
+  ],
+  "projects": [
+    {
+      "tech_stack": [
+        "React",
+        "Supabase"
+      ],
+      "description": "A full-stack milk management and distribution system that automates milk collection, farmer payments, billing, and delivery tracking through a centralized platform."
+    },
+    {
+      "tech_stack": [
+        "Python",
+        "TensorFlow",
+        "Flask",
+        "React"
+      ],
+      "description": "Developed an LSTM-based model to forecast short-term stock prices using live data. Integrated the trained model into a Flask API with a React interface for real-time trend prediction."
+    }
+  ],
+  "certifications": "AIML Internship ICT Academy of Kerala 2025, Python Foundation Certification Springboard 2025, Programming in Java NPTEL 2024"
+}

backend/debug_resume.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+Candidate Name: Jane Doe
+Email: jane@example.com
+Projects:
+1. E-Commerce App
+Tech Stack: React, Node.js
+Description: A shopping site.

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# ================== Core utilities ==================
+python-dotenv>=1.0.0
+pandas>=2.0.0
+tqdm>=4.66.0
+# ================== PDF / DOC processing ==================
+pypdf>=3.0.0
+pdfplumber>=0.10.0
+python-docx>=0.8.11
+unicodedata2>=0.7.2
+# ================== NLP preprocessing ==================
+nltk>=3.8.1
+# ================== Hugging Face / ML ==================
+transformers>=4.44.0
+torch>=2.2.0
+sentence-transformers>=2.2.2
+datasets>=2.19.0
+accelerate>=0.30.0
+# ================== APIs ==================
+openai>=1.30.0
+supabase>=2.0.0
+fastapi>=0.109.0
+uvicorn>=0.27.0
+python-multipart>=0.0.9
+google-genai>=0.2.0

backend/src/__init__.py ADDED Viewed

File without changes

backend/src/embeddings/__init__.py ADDED Viewed

File without changes

backend/src/embeddings/debug_embedding_storage.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import sys
+import os
+import time
+# Add 'backend' directory to path so we can import 'supabase_ingest' directly
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from supabase_ingest import safe_generate_and_store_embeddings, client
+# Mock data
+user_id = "test_user_debug_123"
+extracted_data = {
+    "headline": "Debug Engineer",
+    "summary": "This is a test summary for debugging.",
+    "skills": "Debug, Python",  # DB stores as string
+    "technical_skills": "SQL, Vector DB", # DB stores as string
+    "certifications": "",
+    "languages": "English" # DB stores as string
+}
+print(f"DEBUG: Testing embedding storage for User ID: {user_id}")
+# 1. Ensure user exists in profiles first (FK constraint)
+try:
+    print("DEBUG: Ensuring profile exists...")
+    # UPSERT the mock data into the profiles table so the function can fetch it
+    profile_payload = {
+        "id": user_id,
+        "full_name": "Debug User",
+        "email": "debug@example.com",
+        "updated_at": "now()",
+        # Add the fields we expect to be there
+        "headline": extracted_data["headline"],
+        "summary": extracted_data["summary"],
+        "skills": extracted_data["skills"],
+        "technical_skills": extracted_data["technical_skills"],
+        "certifications": extracted_data["certifications"],
+        "languages": extracted_data["languages"]
+    }
+    client.table("profiles").upsert(profile_payload).execute()
+    print("DEBUG: Profile upserted.")
+except Exception as e:
+    print(f"❌ Failed to create test profile: {e}")
+    sys.exit(1)
+# 2. Run the function
+print("DEBUG: Running safe_generate_and_store_embeddings...")
+# Now it fetches from DB internally, so we don't pass extracted_data
+safe_generate_and_store_embeddings(client, user_id)
+# 3. Check if it exists
+try:
+    print("DEBUG: Verifying storage...")
+    resp = client.table("profile_embeddings").select("*").eq("id", user_id).execute()
+    if resp.data:
+        print("✅ SUCCESS: Embedding record found!")
+        print(f"Data keys: {resp.data[0].keys()}")
+    else:
+        print("❌ FAILURE: No record found in profile_embeddings.")
+except Exception as e:
+    print(f"❌ Verification failed: {e}")

backend/src/embeddings/job_embed.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import numpy as np
+from typing import List
+from dotenv import load_dotenv
+from supabase import create_client
+from sentence_transformers import SentenceTransformer
+# Load env
+load_dotenv()
+SUPABASE_URL = os.environ.get("SUPABASE_URL")
+SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
+# Singleton model (same pattern as profile code)
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        print("📥 Loading BAAI/bge-m3 model for job embeddings...")
+        _model = SentenceTransformer("BAAI/bge-m3")
+    return _model
+def get_supabase():
+    if not SUPABASE_URL or not SUPABASE_KEY:
+        print("❌ Missing Supabase credentials for job embeddings.")
+        return None
+    return create_client(SUPABASE_URL, SUPABASE_KEY)
+# -------- Embedding helpers (IDENTICAL LOGIC) --------
+def generate_embedding(text: str) -> List[float]:
+    if not text or not text.strip():
+        return [0.0] * 1024
+    model = get_model()
+    embedding = model.encode(text, normalize_embeddings=True)
+    return embedding.tolist()
+def generate_list_embedding(items: List[str]) -> List[float]:
+    if not items:
+        return [0.0] * 1024
+    model = get_model()
+    embeddings = model.encode(items, normalize_embeddings=True)
+    mean_embedding = np.mean(embeddings, axis=0)
+    return mean_embedding.tolist()
+# ----------------------------------------------------
+def safe_generate_and_store_job_embeddings(client, job_id: str) -> None:
+    """
+    Fetches job entities, generates entity-wise embeddings,
+    and upserts them into job_embeddings table.
+    """
+    print(f"🧬 Generating job embeddings for Job: {job_id}")
+    # 1. Fetch job entities
+    resp = client.table("job_entities") \
+        .select("*") \
+        .eq("job_id", job_id) \
+        .execute()
+    if not resp.data:
+        print(f"⚠️ Job entities not found for job_id={job_id}")
+        return
+    entities = resp.data[0]
+    # 2. Parse list fields safely (same pattern)
+    def parse_list(val):
+        if not val:
+            return []
+        if isinstance(val, list):
+            return val
+        if isinstance(val, str):
+            return [x.strip() for x in val.split(",") if x.strip()]
+        return []
+    skills = parse_list(entities.get("skills"))
+    technical_skills = parse_list(entities.get("technical_skills"))
+    tools = parse_list(entities.get("tools"))
+    certifications = parse_list(entities.get("certifications"))
+    experience = entities.get("experience") or ""
+    education = entities.get("education") or ""
+    try:
+        # 3. Generate embeddings (ENTITY-WISE)
+        payload = {
+            "job_id": job_id,
+            "skills": generate_list_embedding(skills),
+            "technical_skills": generate_list_embedding(technical_skills),
+            "tools": generate_list_embedding(tools),
+            "experience": generate_embedding(experience),
+            "education": generate_embedding(education),
+            "certifications": generate_list_embedding(certifications),
+            "updated_at": "now()"
+        }
+        # 4. Upsert into job_embeddings
+        client.table("job_embeddings").upsert(payload).execute()
+        print(f"✅ Job embeddings stored for job_id={job_id}")
+    except Exception as e:
+        print(f"❌ Job embedding generation failed: {e}")

backend/src/embeddings/local_embedder.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import json
+import numpy as np
+from typing import List, Any
+from dotenv import load_dotenv
+from supabase import create_client
+from sentence_transformers import SentenceTransformer
+# Load env
+load_dotenv()
+SUPABASE_URL = os.environ.get("SUPABASE_URL")
+SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
+# Initialize Model (Globals are bad but efficient for serverless-ish/script use)
+# Using a singleton pattern to avoid reloading model on every call if imported
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        print("📥 Loading BAAI/bge-m3 model...")
+        _model = SentenceTransformer('BAAI/bge-m3')
+    return _model
+def get_supabase():
+    if not SUPABASE_URL or not SUPABASE_KEY:
+        print("❌ Missing Supabase credentials for embeddings.")
+        return None
+    return create_client(SUPABASE_URL, SUPABASE_KEY)
+def generate_embedding(text: str) -> List[float]:
+    if not text or not text.strip():
+        return [0.0] * 1024 # BGE-M3 is 1024d
+    model = get_model()
+    # BGE-M3 returns 1024 dim
+    embedding = model.encode(text, normalize_embeddings=True)
+    return embedding.tolist()
+def generate_list_embedding(items: List[str]) -> List[float]:
+    if not items:
+        return [0.0] * 1024
+    model = get_model()
+    embeddings = model.encode(items, normalize_embeddings=True)
+    # Mean pooling
+    mean_embedding = np.mean(embeddings, axis=0)
+    return mean_embedding.tolist()
+def safe_generate_and_store_embeddings(client, user_id: str) -> None:
+    """
+    Fetches profile data, generates embeddings, and upserts to profile_embeddings.
+    """
+    print(f"🧬 Generating embeddings for User: {user_id}")
+    # 1. Fetch Profile
+    resp = client.table("profiles").select("*").eq("id", user_id).execute()
+    if not resp.data:
+        print(f"⚠️ Profile not found for {user_id}")
+        return
+    profile = resp.data[0]
+    # 2. Extract Fields
+    # Text fields
+    summary = profile.get("summary") or ""
+    headline = profile.get("headline") or ""
+    role = profile.get("role") or ""
+    # Lists (CSV or Array) - Handle both just in case
+    def parse_list(val):
+        if not val: return []
+        if isinstance(val, list): return val
+        if isinstance(val, str): return [x.strip() for x in val.split(",") if x.strip()]
+        return []
+    skills = parse_list(profile.get("skills"))
+    tech_skills = parse_list(profile.get("technical_skills"))
+    # For experience and education, we might need more complex parsing if stored as JSONB
+    # But for now let's assume simple text representation or skip if complex JSON
+    # If experience is JSONB, we'll serialize it to text for embedding
+    experience_raw = profile.get("work_experience") or []
+    if isinstance(experience_raw, list):
+         # It's a list of objects or strings. Convert to list of strings.
+         experience_texts = []
+         for item in experience_raw:
+             if isinstance(item, dict):
+                 # Flatten: "Role at Company (Year): Description"
+                 role_ = item.get("role") or ""
+                 comp_ = item.get("company") or ""
+                 desc_ = item.get("description") or ""
+                 text = f"{role_} at {comp_}. {desc_}"
+                 experience_texts.append(text)
+             elif isinstance(item, str):
+                 experience_texts.append(item)
+         experience = experience_texts
+    else:
+        experience = []
+    # 3. Generate Embeddings (Extra fields for completeness)
+    certifications = parse_list(profile.get("certifications"))
+    try:
+        current_position_emb = generate_embedding(f"{role} {headline}")
+        summary_emb = generate_embedding(summary)
+        skills_emb = generate_list_embedding(skills)
+        technical_skills_emb = generate_list_embedding(tech_skills)
+        experience_emb = generate_list_embedding(experience)
+        certifications_emb = generate_list_embedding(certifications)
+        # 4. Upsert
+        # Matches columns in create_profile_embeddings.sql
+        payload = {
+            "id": user_id,
+            "headline": current_position_emb,
+            "summary": summary_emb,
+            "skills": skills_emb,
+            "technical_skills": technical_skills_emb,
+            "experience": experience_emb,
+            "certifications": certifications_emb,
+            "updated_at": "now()"
+        }
+        client.table("profile_embeddings").upsert(payload).execute()
+        print(f"✅ Embeddings stored for {user_id}")
+    except Exception as e:
+        print(f"❌ Embedding generation failed: {e}")
+if __name__ == "__main__":
+    # Test run
+    sb = get_supabase()
+    if sb:
+        # Replace with a valid ID for testing if needed
+        pass

backend/src/embeddings/process_all_profiles.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import sys
+import os
+import time
+# Add backend to path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
+from supabase_ingest import client, safe_generate_and_store_embeddings
+def process_all_profiles():
+    print("🔍 Fetching all user IDs from 'profiles' table...")
+    try:
+        # Fetch all profiles (just IDs needed to trigger the function)
+        response = client.table("profiles").select("id").execute()
+        if not response.data:
+            print("⚠️ No profiles found in database.")
+            return
+        profiles = response.data
+        total = len(profiles)
+        print(f"✅ Found {total} profiles to process.")
+        for i, profile in enumerate(profiles):
+            user_id = profile['id']
+            print(f"\n[{i+1}/{total}] Processing User ID: {user_id}")
+            # This function now handles:
+            # 1. Fetching the full profile data from DB
+            # 2. Parsing CSV lists
+            # 3. Generating BGE-M3 embeddings
+            # 4. Upserting to profile_embeddings
+            safe_generate_and_store_embeddings(client, user_id)
+            # Small delay to be nice to the CPU/API
+            # time.sleep(0.1)
+        print("\n🎉 Batch processing complete!")
+    except Exception as e:
+        print(f"❌ Error fetching profiles: {e}")
+if __name__ == "__main__":
+    process_all_profiles()

backend/src/embeddings/test_embedder.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import sys
+import os
+import numpy as np
+# Add backend to path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
+from backend.src.embeddings.local_embedder import generate_embeddings
+sample_data = {
+    "headline": "Senior Software Engineer",
+    "summary": "Experienced in Python and AI.",
+    "skills": ["Communication", "Leadership", "Agile"],
+    "technical_skills": ["Python", "FastAPI", "React"],
+    "certifications": [], # Empty list
+    "languages": ["English", "Spanish"]
+}
+print("Running Embedding Generation Test...")
+embeddings = generate_embeddings(sample_data)
+print("\nResults:")
+for key, vector in embeddings.items():
+    vec_len = len(vector)
+    print(f"Field: {key:20} | Dimensions: {vec_len} | Sample: {vector[:3]}...")
+    if vec_len != 1024:
+        print(f"❌ ERROR: Expected 1024 dimensions, got {vec_len}")
+if "certifications" not in embeddings:
+    print("Field: certifications       | Correctly skipped (empty)")
+print("\nDone.")

backend/src/extraction/__init__.py ADDED Viewed

File without changes

backend/src/extraction/fallback_extractor.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import re
+def extract_fallback(text: str) -> dict:
+    """
+    A dumb Regex-based fallback extractor if Gemini fails.
+    Extracts basic info like Email, Phone, Links, and keyword-matched Skills.
+    """
+    # 1. Email (Basic)
+    email_params = r"[\w\.-]+@[\w\.-]+\.\w+"
+    email_match = re.search(email_params, text)
+    email = email_match.group(0) if email_match else None
+    # 2. Phone (Very Basic - catches 10-12 digit numbers)
+    phone_match = re.search(r"(\+?\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}", text)
+    phone = phone_match.group(0) if phone_match else None
+    # 3. Links (LinkedIn / GitHub / Portfolio)
+    links = re.findall(r"https?://[^\s]+", text)
+    linkedin = next((l for l in links if "linkedin.com" in l), None)
+    github = next((l for l in links if "github.com" in l), None)
+    portfolio = next((l for l in links if l not in [linkedin, github]), None)
+    # 4. Keyword Matching for Skills (Static List)
+    COMMON_SKILLS = [
+        "Python", "Java", "JavaScript", "TypeScript", "C++", "C#", "SQL", "NoSQL",
+        "React", "Angular", "Vue", "Node.js", "Django", "Flask", "FastAPI",
+        "AWS", "Azure", "GCP", "Docker", "Kubernetes", "Git", "CI/CD",
+        "Machine Learning", "Deep Learning", "NLP", "Pandas", "NumPy", "TensorFlow", "PyTorch"
+    ]
+    found_skills = [skill for skill in COMMON_SKILLS if re.search(r"\b" + re.escape(skill) + r"\b", text, re.IGNORECASE)]
+    # 5. Construct Payload (Matches Schema)
+    return {
+        "headline": None,
+        "summary": text[:500] + "..." if len(text) > 500 else text, # Fallback summary is just first 500 chars
+        "skills": found_skills,
+        "technical_skills": found_skills, # Duplicate for safety
+        "education": [],
+        "work_experience": [],
+        "certifications": [],
+        "languages": [],
+        "experience_years": None,
+        # Extra fields specific to Supabase Ingest (mapped later)
+        # "email": email, # Backend doesn't use extracted email usually (uses auth), but good to have
+        "phone": phone,
+        "linkedin": linkedin,
+        "github": github,
+        "portfolio": portfolio
+    }

backend/src/extraction/job_extractor.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+import re
+import json
+import time
+from dotenv import load_dotenv
+from typing import Any, Dict, List, Optional
+from google import genai
+from google.genai import types
+from supabase import create_client
+# ------------------ CONFIGURATION ------------------
+RAW_DIR = "data/jobs/raw"
+PROCESSED_DIR = "data/jobs/entities"
+# ------------------ SETUP ------------------
+load_dotenv()
+SUPABASE_URL = os.environ.get("SUPABASE_URL")
+SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if GEMINI_API_KEY:
+    try:
+        client = genai.Client(api_key=GEMINI_API_KEY)
+    except Exception as e:
+        client = None
+        print(f"⚠️ Failed to initialize Gemini client: {e}")
+else:
+    client = None
+    print("⚠️ GEMINI_API_KEY not set; extraction will be disabled.")
+def clean_text(text: str) -> str:
+    text = re.sub(r"<.*?>", " ", text)
+    text = re.sub(r"[^\x00-\x7F]+", " ", text)
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+def extract_job_entities_gemini(text: str) -> Dict[str, Any]:
+    cleaned_text = clean_text(text)
+    system_prompt = """
+    You are an intelligent information extractor specialized in job descriptions.
+    Your task is to extract ONLY what is explicitly mentioned and categorize them into the following JSON structure.
+    Output JSON Schema:
+    {
+        "skills": ["List of soft skills, general competencies..."],
+        "technical_skills": ["List of technical skills, programming languages, tools..."],
+        "qualification": ["List of educational qualifications..."],
+        "work_experience": ["List of work experience requirements..."],
+        "preferred_skills": ["List of preferred/nice-to-have skills..."]
+    }
+    Rules:
+    - Extract exact text as it appears.
+    - Do NOT infer or add anything not stated.
+    - If no data for a category, return an empty list [].
+    - Output MUST be valid JSON.
+    """
+    if client is None:
+        print("❌ Extraction disabled (no Client).")
+        return {}
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            response = client.models.generate_content(
+                model="gemini-2.5-flash-lite",
+                contents=system_prompt + "\n\nJOB DESCRIPTION:\n" + cleaned_text,
+                config=types.GenerateContentConfig(
+                    temperature=0.1,
+                    response_mime_type="application/json"
+                )
+            )
+            extracted_text = response.text.strip()
+            # Clean potential markdown fences if present (though response_mime_type usually handles it)
+            if extracted_text.startswith("```json"):
+                extracted_text = extracted_text[7:]
+            if extracted_text.startswith("```"):
+                extracted_text = extracted_text[3:]
+            if extracted_text.endswith("```"):
+                extracted_text = extracted_text[:-3]
+            return json.loads(extracted_text)
+        except Exception as e:
+            error_str = str(e)
+            if "503" in error_str or "overloaded" in error_str.lower():
+                wait_time = 2 ** (attempt + 1)
+                print(f"⚠️ Model overloaded. Retrying in {wait_time}s...")
+                time.sleep(wait_time)
+            else:
+                print(f"❌ Gemini Extraction failed: {e}")
+                return {}
+    return {}
+def upsert_job_entities(sb, job_id: str, experience_level: str, data: Dict[str, Any]) -> None:
+    """
+    Upserts the extracted entities into the jobs_entities table.
+    """
+    payload = {
+        "job_id": job_id,
+        "experience_level": experience_level,
+        "skills": data.get("skills", []),
+        "technical_skills": data.get("technical_skills", []),
+        "qualification": data.get("qualification", []),
+        "work_experience": data.get("work_experience", []),
+        "preferred_skills": data.get("preferred_skills", []),
+        "updated_at": "now()"
+    }
+    try:
+        sb.table("jobs_entities").upsert(payload).execute()
+        print(f"✅ Database updated for Job ID: {job_id}")
+    except Exception as e:
+        print(f"❌ DB Upsert Error for {job_id}: {e}")
+def process_single_job(sb, job_id: str, description: str, experience_level: str = None) -> None:
+    """
+    Processes a single job: extracts entities and upserts to DB.
+    """
+    if not description or not description.strip():
+        print(f"⚠️ Skipping empty description for job {job_id}")
+        return
+    print(f"🔍 Processing Job ID: {job_id}")
+    extracted_data = extract_job_entities_gemini(description)
+    if not extracted_data:
+        print("⚠️ No entities extracted.")
+        return
+    upsert_job_entities(sb, job_id, experience_level, extracted_data)
+def process_jobs_from_db() -> None:
+    if not SUPABASE_URL or not SUPABASE_KEY:
+        print("⚠️ SUPABASE_URL or SUPABASE_KEY not set; skipping job fetch")
+        return
+    try:
+        sb = create_client(SUPABASE_URL, SUPABASE_KEY)
+    except Exception as e:
+        print(f"⚠️ Failed to create Supabase client: {e}")
+        return
+    # Fetch jobs from 'jobs' table
+    try:
+        resp = sb.table("jobs").select("id, description, experience_level").execute()
+    except Exception as e:
+        print(f"⚠️ Supabase query failed: {e}")
+        return
+    data = resp.data if hasattr(resp, "data") else []
+    if not data:
+        print("⚠️ No job descriptions returned from Supabase.")
+        return
+    print(f"found {len(data)} jobs to process.")
+    os.makedirs(PROCESSED_DIR, exist_ok=True)
+    for row in data:
+        job_id = row.get("id")
+        desc = row.get("description") or ""
+        process_single_job(sb, job_id, desc, experience_level)
+if __name__ == '__main__':
+    print("🧪 Starting job entity extraction (DB -> Gemini -> DB)...\n")
+    process_jobs_from_db()
+    print("\n🎯 All jobs processed.")

backend/src/extraction/person_details_extraction_gemini.py ADDED Viewed

	@@ -0,0 +1,220 @@

+from src.ingestion.parser import parse_file
+import json
+from dotenv import load_dotenv
+from pathlib import Path
+import os
+from google import genai
+import google.genai.types as types
+import time
+# Load env
+load_dotenv()
+client=genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+BASE_DIR = Path(__file__).resolve().parents[2]
+RAW_DIR = BASE_DIR / "data" / "resumes" / "raw"
+SYSTEM_PROMPT = """
+You are a precise resume entity extraction engine.
+TASK:
+Extract ONLY the information explicitly present in the resume text.
+OUTPUT RULES:
+- Output MUST be valid JSON
+- Do NOT hallucinate. If a field is missing, use null.
+-Include empty lists for missing array fields.
+-Include all fields in the output, even if null or empty.
+-Include only the fields specified in the schema below.
+- Do NOT include any explanations, notes, or extra text outside the JSON.
+- Ensure the JSON is properly formatted and parsable.
+- Return "work_experience" as a LIST of objects with fields: role, company, year, duration, description.
+- Calculate "duration" (e.g. "2 years", "6 months") from dates if not explicitly stated.
+- Return "education" as a LIST of objects with fields: course, institution, year.
+- For "skills", "technical_skills", "certifications", and "languages", return LISTS of strings.
+- **CRITICAL**: "languages" refers ONLY to human spoken/written languages (e.g., English, Hindi, Spanish). Programming languages (Python, Java, etc.) MUST go into "technical_skills".
+- For single-value fields like "role", "headline", "summary" and return STRING or null.
+-Calculate experience_years as an INTEGER representing total years of experience, or null if not derivable.
+-only use the field names and structure defined in the schema below.
+-strictLY follow the JSON schema provided.
+JSON SCHEMA:
+{
+  "headline": string | null,
+  "summary": string | null,
+  "skills": string[],
+  "technical_skills": string[],  <-- Put Programming Languages HERE
+  "education": [
+    {
+      "course": string | null,
+      "institution": string | null,
+      "year": string | null
+    }
+  ],
+  "work_experience": [
+    {
+      "role": string | null,
+      "company": string | null,
+      "years": string | null,
+      "duration": string | null,
+      "description": string | null
+    }
+  ],
+  "projects": [
+    {
+      "title": "string | null",
+      "technologies_used": ["string"],
+      "description": string | null
+    }
+  ],
+  "projects": [
+    {
+      "title": "string | null",
+      "technologies_used": ["string"],
+      "description": "string | null"
+    }
+  ],
+  "certifications": string[],
+  "languages": string[],
+  "experience_years": integer | null
+}
+"current_position": string | null,
+"""
+def extract_resume_entities_gemini(text: str) -> dict:
+    max_retries = 3
+    for attempt in range(max_retries):
+        response = client.models.generate_content(
+            model="gemini-2.5-flash-lite",
+            contents=SYSTEM_PROMPT + "\n\nRESUME TEXT:\n" + text,
+            config=types.GenerateContentConfig(
+                temperature=0,
+                # 2. CRITICAL: This forces Gemini to return raw JSON without Markdown formatting
+                response_mime_type="application/json"
+            )
+        )
+        try:
+            # 3. Clean the response just in case (removes accidental backticks)
+            cleaned_text = response.text.strip()
+            if cleaned_text.startswith("```json"):
+                cleaned_text = cleaned_text[7:]
+            if cleaned_text.startswith("```"):
+                cleaned_text = cleaned_text[3:]
+            if cleaned_text.endswith("```"):
+                cleaned_text = cleaned_text[:-3]
+            return json.loads(cleaned_text)
+        except Exception as e:
+                # Check if it's the "Overloaded" (503) error
+                error_str = str(e)
+                if "503" in error_str or "overloaded" in error_str.lower():
+                    wait_time = 2 ** (attempt + 1) # Exponential backoff: 2s, 4s, 8s...
+                    print(f"⚠️ Model overloaded. Retrying in {wait_time} seconds... (Attempt {attempt+1}/{max_retries})")
+                    time.sleep(wait_time)
+                else:
+                    # If it's a different error (like Auth), fail immediately
+                    print(f"❌ Gemini Error: {e}")
+                    return {}
+        except json.JSONDecodeError:
+            print(f"❌ JSON Decode Error. Raw response was: {response.text}")
+            raise ValueError("Gemini returned invalid JSON")
+        except Exception as e:
+            # Catch model overload or safety filter blocks
+            print(f"❌ Gemini Error: {e}")
+            return {}
+def process_raw_resumes():
+    if not RAW_DIR.exists():
+        raise FileNotFoundError(f"Directory not found: {RAW_DIR}")
+    for file_path in RAW_DIR.iterdir():
+        if file_path.suffix.lower() not in [".pdf", ".docx", ".txt"]:
+            continue
+        print(f"\n📄 Processing: {file_path.name}")
+        try:
+            text = parse_file(str(file_path))
+            entities = extract_resume_entities_gemini(text)
+            print("✅ Extracted entities:")
+            print(entities)
+        except Exception as e:
+            print(f"❌ Failed for {file_path.name}: {e}")
+from src.extraction.fallback_extractor import extract_fallback
+from src.preprocess.regex_pii import extract_contact_info_regex, mask_contact_info_regex
+from src.preprocess.anonymizer import extract_name_and_mask
+def process_single_resume(file_path: str) -> dict:
+    """
+    Helper function for supabase_ingest.py to process a single downloaded file.
+    """
+    text = ""
+    pii_data = {}
+    try:
+        # 1. Convert file path to string just in case
+        path_str = str(file_path)
+        # 2. Parse the text from the file (PDF/DOCX)
+        raw_text = parse_file(path_str)
+        # 3a. Privacy Step 1: Extract and Mask Contact Info (Regex)
+        print("🔒 [1/2] Masking Phone/Email/Links...")
+        pii_contact = extract_contact_info_regex(raw_text)
+        masked_text_v1 = mask_contact_info_regex(raw_text)
+        # 3b. Privacy Step 2: Extract and Mask Candidate Name (NER)
+        print("🔒 [2/2] Masking Names (NER)...")
+        ner_result = extract_name_and_mask(masked_text_v1)
+        final_masked_text = ner_result["masked_text"]
+        candidate_name = ner_result["candidate_name"]
+        # Merge PII Data
+        pii_data = pii_contact
+        pii_data["full_name"] = candidate_name
+        # Store masked text for error handling usage
+        text = final_masked_text
+        print(f"DEBUG: Final Masked Text Length: {len(text)}")
+        if len(text) < 50:
+            print("⚠️ WARNING: Masked text is suspiciously short!")
+        # 4. Send FINAL MASKED text to Gemini
+        print("🧠 Sending to Gemini...")
+        extracted = extract_resume_entities_gemini(final_masked_text)
+        # 5. Fallback if Gemini failed
+        if not extracted:
+            print("⚠️ Gemini returned empty. Using Regex Fallback.")
+            extracted = extract_fallback(final_masked_text)
+        # 6. Merge PII back into results (whether from Gemini or Fallback)
+        extracted.update(pii_data)
+        return extracted
+    except Exception as e:
+        print(f"❌ Error processing {file_path}: {e}")
+        # Final Fallback attempt
+        if text:
+            print("⚠️ Exception occurred. Using Regex Fallback on masked text.")
+            fallback_data = extract_fallback(text)
+            fallback_data.update(pii_data)
+            return fallback_data
+        return pii_data
+if __name__ == "__main__":
+    process_raw_resumes()

backend/src/extraction/test_regex.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import sys
+import os
+# Add backend to path so we can import
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../..')))
+from backend.src.extraction.fallback_extractor import extract_fallback
+test_cases = [
+    "+91 9876543210",          # India
+    "+919876543210",           # India No Space
+    "9876543210",              # India Local
+    "+1 212-555-0199",         # US
+    "+44 7911 123456",         # UK Mobile
+    "+971 50 1234567",         # UAE
+    "+61 412 345 678",         # Australia
+    "+49 151 12345678",        # Germany
+    "+33 6 12 34 56 78",       # France
+    "+81 90-1234-5678",        # Japan
+    "Phone: +91 98765-43210",  # In text
+    "Call me at 123-456-7890", # US Local in text
+    "No phone number here"
+]
+print("Testing extract_fallback Phone Extraction:")
+with open("test_output.txt", "w", encoding="utf-8") as f:
+    for t in test_cases:
+        result = extract_fallback(t)
+        output_line = f"'{t}' -> {result.get('phone')}"
+        print(output_line)
+        f.write(output_line + "\n")

backend/src/ingestion/__init__.py ADDED Viewed

File without changes

backend/src/ingestion/docx_reader.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from docx import Document
+from src.preprocess.cleaner import postprocess_extracted_text
+def parse_docx(path: str) -> str:
+    """
+    Extract text from DOCX file.
+    Returns postprocessed text ready for NER and cleaning.
+    """
+    doc = Document(path)
+    text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
+    return postprocess_extracted_text(text)

backend/src/ingestion/parser.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from .pdf_reader import parse_pdf
+from .docx_reader import parse_docx
+from src.preprocess.cleaner import postprocess_extracted_text
+from src.preprocess.cleaner import clean_text
+from src.preprocess.anonymizer import remove_pii
+def parse_file(path: str) -> str:
+    """Detect file type and parse accordingly."""
+    ext = os.path.splitext(path)[1].lower()
+    if ext == ".pdf":
+        text = parse_pdf(path)
+    elif ext == ".docx":
+        text = parse_docx(path)
+    elif ext == ".txt":
+        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+            text = f.read()
+    else:
+        raise ValueError(f"Unsupported file type: {ext}")
+    return postprocess_extracted_text(remove_pii(clean_text(text)))

backend/src/ingestion/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import pypdf
+import pdfplumber
+from src.preprocess.cleaner import postprocess_extracted_text
+def parse_pdf(path: str) -> str:
+    """
+    Extract text from a PDF file.
+    Tries pdfplumber first, falls back to pypdf.
+    Returns postprocessed text.
+    """
+    text = ""
+    # --- pdfplumber extraction ---
+    try:
+        with pdfplumber.open(path) as pdf:
+            for page in pdf.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
+        if text.strip():
+            return postprocess_extracted_text(text)
+    except Exception as e:
+        print(f"⚠️ pdfplumber failed for {path}: {e}")
+    # --- fallback to pypdf ---
+    try:
+        with open(path, "rb") as f:
+            reader = pypdf.PdfReader(f)
+            for page in reader.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
+        if text.strip():
+            return postprocess_extracted_text(text)
+    except Exception as e:
+        print(f"❌ pypdf also failed for {path}: {e}")
+    raise ValueError(f"Unable to extract text from PDF: {path}")

backend/src/matching/__init__.py ADDED Viewed

File without changes

backend/src/matching/similarity.py ADDED Viewed

File without changes