import fitz # PyMuPDF for PDF processing import requests import json import re from io import BytesIO import supabase from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model from sentence_transformers import SentenceTransformer, util # These functions will be called in the app.py file def evaluate_resumes(uploaded_files, job_description): """Evaluates uploaded resumes and returns shortlisted candidates.""" candidates = [] for pdf_file in uploaded_files: resume_text = parse_resume(pdf_file) score = score_candidate(resume_text, job_description) email = extract_email(resume_text) # Generate a summary of the resume summary = summarize_resume(resume_text) candidates.append({ "name": pdf_file.name, "resume": resume_text, "score": score, "email": email, "summary": summary }) # Store all details including summary in Supabase store_in_supabase(resume_text, score, pdf_file.name, email, summary) return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5] # Return top 5 candidates def parse_resume(pdf_file): """Extracts text from a resume PDF.""" doc = fitz.open(stream=pdf_file.read(), filetype="pdf") text = "\n".join([page.get_text("text") for page in doc]) return text def extract_email(resume_text): """Extracts an email address from resume text.""" match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text) return match.group(0) if match else None def score_candidate(resume_text, job_description): """ Scores the candidate's resume based on the job description using sentence-transformers. :param resume_text: The extracted resume text. :param job_description: The job description for comparison. :return: A numerical score (cosine similarity between 0 and 1). """ try: # Generate embeddings resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True) job_embedding = embedding_model.encode(job_description, convert_to_tensor=True) # Compute cosine similarity score = util.pytorch_cos_sim(resume_embedding, job_embedding).item() return round(score, 4) # Return similarity score rounded to 4 decimal places except Exception as e: print(f"Error computing similarity score: {e}") return 0 # Return 0 if scoring fails def summarize_resume(resume_text): """ Summarizes a resume using the Google gemma model. :param resume_text: The resume text to summarize. :return: A summarized version of the resume. """ payload = {"inputs": f"Summarize this resume: {resume_text}"} response = query(payload, model="gemma") # Use gemma for summarization if response is None: print("Error: API response is None") return "Summary could not be generated." # If the response is a list, extract the first element if isinstance(response, list) and len(response) > 0: response = response[0] try: if isinstance(response, dict) and "generated_text" in response: return response["generated_text"] else: print("Unexpected API response format:", response) return "Summary could not be generated." except (TypeError, ValueError) as e: print(f"Error parsing summary: {e}") return "Summary could not be generated." def store_in_supabase(resume_text, score, candidate_name, email, summary): """ Stores resume data in Supabase. :param resume_text: The extracted resume text. :param score: The candidate's score (must be a valid number). :param candidate_name: The candidate's name. :param email: Candidate's email address. :param summary: A summarized version of the resume. """ if score is None: score = 0 # Ensure score is never NULL data = { "name": candidate_name, "resume": resume_text, "score": score, "email": email, "summary": summary } response = supabase.table("candidates").insert(data).execute() return response # Test with 10 resumes, if they will be shortlisted def generate_pdf_report(shortlisted_candidates): """Generates a PDF summary of shortlisted candidates.""" pdf = BytesIO() doc = fitz.open() for candidate in shortlisted_candidates: page = doc.new_page() # Use the stored summary, or provide a fallback summary = candidate.get("summary", "No summary available") page.insert_text( (50, 50), f"Candidate: {candidate['name']}\n" f"Email: {candidate['email']}\n" f"Score: {candidate['score']}\n" f"Summary: {summary}" ) doc.save(pdf) pdf.seek(0) return pdf