TalentLensAI / utils.py
Johnny
added config.toml, updated requirements.txt, UI update
edfcf73
raw
history blame
4.53 kB
import fitz # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
# These functions will be called in the main.py file
def evaluate_resumes(uploaded_files, job_description):
"""Evaluates uploaded resumes and returns shortlisted candidates."""
candidates = []
for pdf_file in uploaded_files:
resume_text = parse_resume(pdf_file)
score = score_candidate(resume_text, job_description)
email = extract_email(resume_text)
# Generate a summary of the resume
summary = summarize_resume(resume_text)
candidates.append({
"name": pdf_file.name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
})
# Store all details including summary in Supabase
store_in_supabase(resume_text, score, pdf_file.name, email, summary)
return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5] # Return top 5 candidates
def parse_resume(pdf_file):
"""Extracts text from a resume PDF."""
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text
def extract_email(resume_text):
"""Extracts an email address from resume text."""
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
return match.group(0) if match else None
# Test on why score 0 is returned even though resume matches key words
# score_candidate function will use HuggingFace gemini model
def score_candidate(resume_text, job_description):
"""
Scores the candidate's resume based on the job description using the Hugging Face API.
:param resume_text: The extracted resume text.
:param job_description: The job description for comparison.
:return: A numerical score (default 0 if scoring fails).
"""
payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
response_gemma = query(payload, model="gemma") # Use Google Gemma Model for scoring
if response_gemma is None:
return 0 # Return 0 if API call fails
try:
return float(response_gemma.get("score", 0)) # Ensure score is always a float
except (TypeError, ValueError):
return 0 # Return 0 if score parsing fails
# summarize_resume function will use HuggingFace BART model
def summarize_resume(resume_text):
"""
Summarizes the resume using Facebook's BART-Large-CNN model.
:param resume_text: The extracted resume text.
:return: A summarized version of the resume or an error message.
"""
payload = {"inputs": resume_text}
response_bart = query(payload, model="bart")
if response_bart is None:
return "Summary could not be generated." # Handle API failures gracefully
try:
summary = response_bart[0].get("summary_text", "Summary not available.")
return summary
except (IndexError, KeyError):
return "Summary not available."
def store_in_supabase(resume_text, score, candidate_name, email, summary):
"""
Stores resume data in Supabase.
:param resume_text: The extracted resume text.
:param score: The candidate's score (must be a valid number).
:param candidate_name: The candidate's name.
:param email: Candidate's email address.
:param summary: A summarized version of the resume.
"""
if score is None:
score = 0 # Ensure score is never NULL
data = {
"name": candidate_name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
}
response = supabase.table("candidates").insert(data).execute()
return response
# Test with 10 resumes, if they will be shortlisted
def generate_pdf_report(shortlisted_candidates):
"""Generates a PDF summary of shortlisted candidates."""
pdf = BytesIO()
doc = fitz.open()
for candidate in shortlisted_candidates:
page = doc.new_page()
# Use the stored summary, or provide a fallback
summary = candidate.get("summary", "No summary available")
page.insert_text(
(50, 50),
f"Candidate: {candidate['name']}\n"
f"Email: {candidate['email']}\n"
f"Score: {candidate['score']}\n"
f"Summary: {summary}"
)
doc.save(pdf)
pdf.seek(0)
return pdf