Spaces:
Running
Running
Johnny
commited on
Commit
·
8f8f414
1
Parent(s):
0c91845
added validations on evaluate, update filter_resumes_by_keywords to use fuzzy, extract_keywords use TfidVector, enforce strict keyword matching
Browse files- app.py +46 -24
- requirements.txt +3 -1
- utils.py +96 -25
app.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
-
from utils import
|
|
|
|
|
|
|
3 |
from config import supabase
|
4 |
from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
5 |
import fitz # PyMuPDF
|
@@ -10,32 +13,51 @@ import requests
|
|
10 |
|
11 |
def main():
|
12 |
st.set_page_config(page_title="TalentLens.AI", layout="centered")
|
13 |
-
|
14 |
-
|
15 |
-
unsafe_allow_html=True
|
16 |
-
)
|
17 |
st.divider()
|
18 |
-
st.markdown(
|
19 |
-
|
20 |
-
|
21 |
-
)
|
22 |
-
uploaded_files = st.file_uploader(
|
23 |
-
"Upload Resumes (PDF Only, Max 10)",
|
24 |
-
accept_multiple_files=True,
|
25 |
-
type=["pdf"])
|
26 |
-
job_description = st.text_area("Enter Job Description")
|
27 |
-
|
28 |
if uploaded_files and len(uploaded_files) > 10:
|
29 |
-
st.error("
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
if __name__ == "__main__":
|
41 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
from utils import (
|
3 |
+
evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
|
4 |
+
score_candidate, parse_resume, summarize_resume, extract_keywords
|
5 |
+
)
|
6 |
from config import supabase
|
7 |
from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
8 |
import fitz # PyMuPDF
|
|
|
13 |
|
14 |
def main():
|
15 |
st.set_page_config(page_title="TalentLens.AI", layout="centered")
|
16 |
+
|
17 |
+
st.markdown("<h1 style='text-align: center;'>TalentLens.AI</h1>", unsafe_allow_html=True)
|
|
|
|
|
18 |
st.divider()
|
19 |
+
st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
|
20 |
+
|
21 |
+
# Limit resume uploads to 10
|
22 |
+
uploaded_files = st.file_uploader("Upload Resumes (PDF Only, Max: 10)", accept_multiple_files=True, type=["pdf"])
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
if uploaded_files and len(uploaded_files) > 10:
|
24 |
+
st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
|
25 |
+
return
|
26 |
+
|
27 |
+
job_description = st.text_area("Enter Job Description")
|
28 |
+
|
29 |
+
if st.button("Evaluate Resumes"):
|
30 |
+
if not job_description:
|
31 |
+
st.error("⚠️ Please enter a job description.")
|
32 |
+
return
|
33 |
+
if not uploaded_files:
|
34 |
+
st.error("⚠️ Please upload at least one resume.")
|
35 |
+
return
|
36 |
+
#if uploaded_files and job_description:
|
37 |
+
st.write("### 📊 Evaluating Resumes...")
|
38 |
|
39 |
+
# 🔹 Extract required keywords dynamically from the job description
|
40 |
+
required_keywords = extract_keywords(job_description)
|
41 |
+
st.write(f"**Extracted Keywords:** {', '.join(required_keywords)}")
|
42 |
+
|
43 |
+
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
44 |
+
|
45 |
+
if not shortlisted:
|
46 |
+
st.warning("⚠️ No resumes matched the required keywords.")
|
47 |
+
else:
|
48 |
+
st.subheader("✅ Shortlisted Candidates:")
|
49 |
+
for candidate in shortlisted:
|
50 |
+
st.write(f"**{candidate['name']}**")
|
51 |
+
|
52 |
+
# Generate PDF Report
|
53 |
+
pdf_report = generate_pdf_report(shortlisted)
|
54 |
+
st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
|
55 |
+
|
56 |
+
# 🔻 Display removed candidates due to missing keywords
|
57 |
+
if removed_candidates:
|
58 |
+
st.subheader("❌ Resumes Removed:")
|
59 |
+
for removed in removed_candidates:
|
60 |
+
st.write(f"**{removed['name']}** - {removed['reason']}")
|
61 |
|
62 |
if __name__ == "__main__":
|
63 |
main()
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ python-dotenv
|
|
4 |
supabase
|
5 |
PyMuPDF
|
6 |
pytest
|
7 |
-
sentence-transformers
|
|
|
|
|
|
4 |
supabase
|
5 |
PyMuPDF
|
6 |
pytest
|
7 |
+
sentence-transformers
|
8 |
+
spacy
|
9 |
+
fuzzywuzzy
|
utils.py
CHANGED
@@ -4,22 +4,38 @@ import json
|
|
4 |
import re
|
5 |
from io import BytesIO
|
6 |
import supabase
|
7 |
-
from config import
|
|
|
|
|
|
|
8 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# These functions will be called in the app.py file
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
14 |
candidates = []
|
|
|
|
|
15 |
for pdf_file in uploaded_files:
|
16 |
resume_text = parse_resume(pdf_file)
|
17 |
score = score_candidate(resume_text, job_description)
|
18 |
email = extract_email(resume_text)
|
19 |
-
|
20 |
-
# Generate a summary of the resume
|
21 |
summary = summarize_resume(resume_text)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
candidates.append({
|
24 |
"name": pdf_file.name,
|
25 |
"resume": resume_text,
|
@@ -28,10 +44,71 @@ def evaluate_resumes(uploaded_files, job_description):
|
|
28 |
"summary": summary
|
29 |
})
|
30 |
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def parse_resume(pdf_file):
|
37 |
"""Extracts text from a resume PDF."""
|
@@ -65,36 +142,30 @@ def score_candidate(resume_text, job_description):
|
|
65 |
except Exception as e:
|
66 |
print(f"Error computing similarity score: {e}")
|
67 |
return 0 # Return 0 if scoring fails
|
68 |
-
|
69 |
-
# create multiple agents for different scoring
|
70 |
-
# agent_experience, agent_programming_language, agent_education, agent_soft_skills, etc etc
|
71 |
-
# Scoring from 1-5
|
72 |
|
73 |
def summarize_resume(resume_text):
|
74 |
"""
|
75 |
-
Summarizes a resume using the
|
76 |
-
|
77 |
-
:param resume_text: The resume text to summarize.
|
78 |
-
:return: A summarized version of the resume.
|
79 |
"""
|
80 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
81 |
-
|
82 |
-
response = query(payload, model="bart") # Use bart for summarization
|
83 |
|
84 |
-
if response
|
85 |
print("Error: API response is None")
|
86 |
return "Summary could not be generated."
|
87 |
|
88 |
-
# If the response is a list, extract the first element
|
89 |
-
if isinstance(response, list) and len(response) > 0:
|
90 |
-
response = response[0]
|
91 |
-
|
92 |
try:
|
93 |
-
if
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
else:
|
96 |
print("Unexpected API response format:", response)
|
97 |
return "Summary could not be generated."
|
|
|
98 |
except (TypeError, ValueError) as e:
|
99 |
print(f"Error parsing summary: {e}")
|
100 |
return "Summary could not be generated."
|
|
|
4 |
import re
|
5 |
from io import BytesIO
|
6 |
import supabase
|
7 |
+
from config import (
|
8 |
+
SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
|
9 |
+
supabase, HF_MODELS, query, embedding_model
|
10 |
+
)
|
11 |
from sentence_transformers import SentenceTransformer, util
|
12 |
+
import spacy
|
13 |
+
from collections import Counter
|
14 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
15 |
+
import streamlit as st
|
16 |
+
from fuzzywuzzy import fuzz
|
17 |
|
18 |
# These functions will be called in the app.py file
|
19 |
|
20 |
+
# Load spaCy NLP model
|
21 |
+
nlp = spacy.load("en_core_web_sm")
|
22 |
+
|
23 |
+
def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
24 |
+
"""Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
|
25 |
candidates = []
|
26 |
+
removed_candidates = []
|
27 |
+
|
28 |
for pdf_file in uploaded_files:
|
29 |
resume_text = parse_resume(pdf_file)
|
30 |
score = score_candidate(resume_text, job_description)
|
31 |
email = extract_email(resume_text)
|
|
|
|
|
32 |
summary = summarize_resume(resume_text)
|
33 |
|
34 |
+
# If score is below 0.20, remove the candidate immediately
|
35 |
+
if score < 0.20:
|
36 |
+
removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
|
37 |
+
continue # Skip adding to candidates list
|
38 |
+
|
39 |
candidates.append({
|
40 |
"name": pdf_file.name,
|
41 |
"resume": resume_text,
|
|
|
44 |
"summary": summary
|
45 |
})
|
46 |
|
47 |
+
# Filter resumes based on job description keywords
|
48 |
+
filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match=2)
|
49 |
+
|
50 |
+
# Store removed candidates with a reason
|
51 |
+
for name in keyword_removed:
|
52 |
+
removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
|
53 |
+
|
54 |
+
return sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5], removed_candidates
|
55 |
+
|
56 |
+
def extract_keywords(text, top_n=10):
|
57 |
+
"""Extracts key terms from the job description using TF-IDF and spaCy."""
|
58 |
+
if not text.strip(): # Handle empty job descriptions
|
59 |
+
return []
|
60 |
+
|
61 |
+
doc = nlp(text.lower())
|
62 |
+
|
63 |
+
# Extract meaningful words (nouns, proper nouns, verbs, adjectives)
|
64 |
+
keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not token.is_stop]
|
65 |
+
|
66 |
+
if not keywords: # If no valid keywords were found, return an empty list
|
67 |
+
return []
|
68 |
+
|
69 |
+
# Use TF-IDF to rank keywords
|
70 |
+
vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
|
71 |
+
|
72 |
+
try:
|
73 |
+
tfidf_matrix = vectorizer.fit_transform([" ".join(keywords)])
|
74 |
+
feature_array = vectorizer.get_feature_names_out()
|
75 |
+
tfidf_scores = tfidf_matrix.toarray()[0]
|
76 |
+
|
77 |
+
# Sort by highest TF-IDF scores
|
78 |
+
keyword_scores = sorted(zip(feature_array, tfidf_scores), key=lambda x: x[1], reverse=True)
|
79 |
|
80 |
+
return [kw for kw, score in keyword_scores[:top_n]]
|
81 |
+
|
82 |
+
except ValueError: # Catch empty vocabulary error
|
83 |
+
return []
|
84 |
+
|
85 |
+
def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
|
86 |
+
"""Filters resumes based on keyword presence and similarity."""
|
87 |
+
job_keywords = extract_keywords(job_description)
|
88 |
+
filtered_resumes = []
|
89 |
+
removed_resumes = []
|
90 |
+
|
91 |
+
if len(job_keywords) < min_keyword_match:
|
92 |
+
st.warning("⚠️ Job description is either too short or absent for keyword filtering.")
|
93 |
+
return resumes, [] # Skip keyword filtering if job description lacks enough keywords
|
94 |
+
|
95 |
+
for resume in resumes:
|
96 |
+
resume_text = resume["resume"].lower()
|
97 |
+
matched_keywords = []
|
98 |
+
|
99 |
+
# Apply fuzzy matching to allow flexible keyword detection
|
100 |
+
for keyword in job_keywords:
|
101 |
+
for word in resume_text.split():
|
102 |
+
if fuzz.partial_ratio(keyword, word) > 80: # 80% similarity threshold
|
103 |
+
matched_keywords.append(keyword)
|
104 |
+
|
105 |
+
# Enforce minimum keyword matches
|
106 |
+
if len(set(matched_keywords)) >= min_keyword_match:
|
107 |
+
filtered_resumes.append(resume)
|
108 |
+
else:
|
109 |
+
removed_resumes.append(resume["name"])
|
110 |
+
|
111 |
+
return filtered_resumes, removed_resumes
|
112 |
|
113 |
def parse_resume(pdf_file):
|
114 |
"""Extracts text from a resume PDF."""
|
|
|
142 |
except Exception as e:
|
143 |
print(f"Error computing similarity score: {e}")
|
144 |
return 0 # Return 0 if scoring fails
|
|
|
|
|
|
|
|
|
145 |
|
146 |
def summarize_resume(resume_text):
|
147 |
"""
|
148 |
+
Summarizes a resume using the Hugging Face BART model.
|
|
|
|
|
|
|
149 |
"""
|
150 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
151 |
+
response = query(payload, model="bart") # Call API
|
|
|
152 |
|
153 |
+
if not response:
|
154 |
print("Error: API response is None")
|
155 |
return "Summary could not be generated."
|
156 |
|
|
|
|
|
|
|
|
|
157 |
try:
|
158 |
+
# Check if response is a list (sometimes HF returns a list with a dict inside)
|
159 |
+
if isinstance(response, list) and len(response) > 0:
|
160 |
+
response = response[0]
|
161 |
+
|
162 |
+
# Adjust for different response formats
|
163 |
+
if isinstance(response, dict) and ("generated_text" in response or "summary_text" in response):
|
164 |
+
return response.get("generated_text", response.get("summary_text", "Summary not available"))
|
165 |
else:
|
166 |
print("Unexpected API response format:", response)
|
167 |
return "Summary could not be generated."
|
168 |
+
|
169 |
except (TypeError, ValueError) as e:
|
170 |
print(f"Error parsing summary: {e}")
|
171 |
return "Summary could not be generated."
|