Spaces:

gauravbox
/

TalentLensAI

Running

App Files Files Community

Johnny commited on Apr 3

Commit

8f8f414

1 Parent(s): 0c91845

added validations on evaluate, update filter_resumes_by_keywords to use fuzzy, extract_keywords use TfidVector, enforce strict keyword matching

Browse files

Files changed (3) hide show

app.py +46 -24
requirements.txt +3 -1
utils.py +96 -25

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import streamlit as st
-from utils import evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email, score_candidate, parse_resume, summarize_resume
 from config import supabase
 from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
 import fitz  # PyMuPDF
@@ -10,32 +13,51 @@ import requests
 def main():
     st.set_page_config(page_title="TalentLens.AI", layout="centered")
-    st.markdown(
-        "<h1 style='text-align: center;'>TalentLens.AI</h1>",
-        unsafe_allow_html=True
-    )
     st.divider()
-    st.markdown(
-        "<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>",
-        unsafe_allow_html=True
-    )
-    uploaded_files = st.file_uploader(
-        "Upload Resumes (PDF Only, Max 10)",
-        accept_multiple_files=True,
-        type=["pdf"])
-    job_description = st.text_area("Enter Job Description")
     if uploaded_files and len(uploaded_files) > 10:
-        st.error("Please upload a maximum of 10 resumes.")
-    elif st.button("Evaluate Resumes"):
-        shortlisted = evaluate_resumes(uploaded_files, job_description)
-        for candidate in shortlisted:
-            st.write(f"**{candidate['name']}**") # removed - Score: {candidate['score']}
-        # Generate PDF Report
-        pdf_report = generate_pdf_report(shortlisted)
-        st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
 if __name__ == "__main__":
     main()

 import streamlit as st
+from utils import (
+    evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
+    score_candidate, parse_resume, summarize_resume, extract_keywords
+)
 from config import supabase
 from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
 import fitz  # PyMuPDF
 def main():
     st.set_page_config(page_title="TalentLens.AI", layout="centered")
+    st.markdown("<h1 style='text-align: center;'>TalentLens.AI</h1>", unsafe_allow_html=True)
     st.divider()
+    st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
+    # Limit resume uploads to 10
+    uploaded_files = st.file_uploader("Upload Resumes (PDF Only, Max: 10)", accept_multiple_files=True, type=["pdf"])
     if uploaded_files and len(uploaded_files) > 10:
+        st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
+        return
+    job_description = st.text_area("Enter Job Description")
+    if st.button("Evaluate Resumes"):
+        if not job_description:
+            st.error("⚠️ Please enter a job description.")
+            return
+        if not uploaded_files:
+            st.error("⚠️ Please upload at least one resume.")
+            return
+        #if uploaded_files and job_description:
+        st.write("### 📊 Evaluating Resumes...")
+        # 🔹 Extract required keywords dynamically from the job description
+        required_keywords = extract_keywords(job_description)
+        st.write(f"**Extracted Keywords:** {', '.join(required_keywords)}")
+        shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
+        if not shortlisted:
+            st.warning("⚠️ No resumes matched the required keywords.")
+        else:
+            st.subheader("✅ Shortlisted Candidates:")
+            for candidate in shortlisted:
+                st.write(f"**{candidate['name']}**")
+            # Generate PDF Report
+            pdf_report = generate_pdf_report(shortlisted)
+            st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
+        # 🔻 Display removed candidates due to missing keywords
+        if removed_candidates:
+            st.subheader("❌ Resumes Removed:")
+            for removed in removed_candidates:
+                st.write(f"**{removed['name']}** - {removed['reason']}")
 if __name__ == "__main__":
     main()

requirements.txt CHANGED Viewed

@@ -4,4 +4,6 @@ python-dotenv
 supabase
 PyMuPDF
 pytest
-sentence-transformers

 supabase
 PyMuPDF
 pytest
+sentence-transformers
+spacy
+fuzzywuzzy

utils.py CHANGED Viewed

@@ -4,22 +4,38 @@ import json
 import re
 from io import BytesIO
 import supabase
-from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model
 from sentence_transformers import SentenceTransformer, util
 # These functions will be called in the app.py file
-def evaluate_resumes(uploaded_files, job_description):
-    """Evaluates uploaded resumes and returns shortlisted candidates."""
     candidates = []
     for pdf_file in uploaded_files:
         resume_text = parse_resume(pdf_file)
         score = score_candidate(resume_text, job_description)
         email = extract_email(resume_text)
-        # Generate a summary of the resume
         summary = summarize_resume(resume_text)
         candidates.append({
             "name": pdf_file.name,
             "resume": resume_text,
@@ -28,10 +44,71 @@ def evaluate_resumes(uploaded_files, job_description):
             "summary": summary
         })
-        # Store all details including summary in Supabase
-        store_in_supabase(resume_text, score, pdf_file.name, email, summary)
-    return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5]  # Return top 5 candidates
 def parse_resume(pdf_file):
     """Extracts text from a resume PDF."""
@@ -65,36 +142,30 @@ def score_candidate(resume_text, job_description):
     except Exception as e:
         print(f"Error computing similarity score: {e}")
         return 0  # Return 0 if scoring fails
-    # create multiple agents for different scoring
-    # agent_experience, agent_programming_language, agent_education, agent_soft_skills, etc etc
-    # Scoring from 1-5
 def summarize_resume(resume_text):
     """
-    Summarizes a resume using the Google bart model.
-    :param resume_text: The resume text to summarize.
-    :return: A summarized version of the resume.
     """
     payload = {"inputs": f"Summarize this resume: {resume_text}"}
-    response = query(payload, model="bart")  # Use bart for summarization
-    if response is None:
         print("Error: API response is None")
         return "Summary could not be generated."
-    # If the response is a list, extract the first element
-    if isinstance(response, list) and len(response) > 0:
-        response = response[0]
     try:
-        if isinstance(response, dict) and "generated_text" in response:
-            return response["generated_text"]
         else:
             print("Unexpected API response format:", response)
             return "Summary could not be generated."
     except (TypeError, ValueError) as e:
         print(f"Error parsing summary: {e}")
         return "Summary could not be generated."

 import re
 from io import BytesIO
 import supabase
+from config import (
+    SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
+    supabase, HF_MODELS, query, embedding_model
+)
 from sentence_transformers import SentenceTransformer, util
+import spacy
+from collections import Counter
+from sklearn.feature_extraction.text import TfidfVectorizer
+import streamlit as st
+from fuzzywuzzy import fuzz
 # These functions will be called in the app.py file
+# Load spaCy NLP model
+nlp = spacy.load("en_core_web_sm")
+def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
+    """Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
     candidates = []
+    removed_candidates = []
     for pdf_file in uploaded_files:
         resume_text = parse_resume(pdf_file)
         score = score_candidate(resume_text, job_description)
         email = extract_email(resume_text)
         summary = summarize_resume(resume_text)
+        # If score is below 0.20, remove the candidate immediately
+        if score < 0.20:
+            removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
+            continue  # Skip adding to candidates list
         candidates.append({
             "name": pdf_file.name,
             "resume": resume_text,
             "summary": summary
         })
+    # Filter resumes based on job description keywords
+    filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match=2)
+    # Store removed candidates with a reason
+    for name in keyword_removed:
+        removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
+    return sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5], removed_candidates
+def extract_keywords(text, top_n=10):
+    """Extracts key terms from the job description using TF-IDF and spaCy."""
+    if not text.strip():  # Handle empty job descriptions
+        return []
+    doc = nlp(text.lower())
+    # Extract meaningful words (nouns, proper nouns, verbs, adjectives)
+    keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not token.is_stop]
+    if not keywords:  # If no valid keywords were found, return an empty list
+        return []
+    # Use TF-IDF to rank keywords
+    vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
+    try:
+        tfidf_matrix = vectorizer.fit_transform([" ".join(keywords)])
+        feature_array = vectorizer.get_feature_names_out()
+        tfidf_scores = tfidf_matrix.toarray()[0]
+        # Sort by highest TF-IDF scores
+        keyword_scores = sorted(zip(feature_array, tfidf_scores), key=lambda x: x[1], reverse=True)
+        return [kw for kw, score in keyword_scores[:top_n]]
+    except ValueError:  # Catch empty vocabulary error
+        return []
+def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
+    """Filters resumes based on keyword presence and similarity."""
+    job_keywords = extract_keywords(job_description)
+    filtered_resumes = []
+    removed_resumes = []
+    if len(job_keywords) < min_keyword_match:
+        st.warning("⚠️ Job description is either too short or absent for keyword filtering.")
+        return resumes, []  # Skip keyword filtering if job description lacks enough keywords
+    for resume in resumes:
+        resume_text = resume["resume"].lower()
+        matched_keywords = []
+        # Apply fuzzy matching to allow flexible keyword detection
+        for keyword in job_keywords:
+            for word in resume_text.split():
+                if fuzz.partial_ratio(keyword, word) > 80:  # 80% similarity threshold
+                    matched_keywords.append(keyword)
+        # Enforce minimum keyword matches
+        if len(set(matched_keywords)) >= min_keyword_match:
+            filtered_resumes.append(resume)
+        else:
+            removed_resumes.append(resume["name"])
+    return filtered_resumes, removed_resumes
 def parse_resume(pdf_file):
     """Extracts text from a resume PDF."""
     except Exception as e:
         print(f"Error computing similarity score: {e}")
         return 0  # Return 0 if scoring fails
 def summarize_resume(resume_text):
     """
+    Summarizes a resume using the Hugging Face BART model.
     """
     payload = {"inputs": f"Summarize this resume: {resume_text}"}
+    response = query(payload, model="bart")  # Call API
+    if not response:
         print("Error: API response is None")
         return "Summary could not be generated."
     try:
+        # Check if response is a list (sometimes HF returns a list with a dict inside)
+        if isinstance(response, list) and len(response) > 0:
+            response = response[0]
+        # Adjust for different response formats
+        if isinstance(response, dict) and ("generated_text" in response or "summary_text" in response):
+            return response.get("generated_text", response.get("summary_text", "Summary not available"))
         else:
             print("Unexpected API response format:", response)
             return "Summary could not be generated."
     except (TypeError, ValueError) as e:
         print(f"Error parsing summary: {e}")
         return "Summary could not be generated."