Spaces:
Running
Running
Johnny
commited on
Commit
·
2854e2c
1
Parent(s):
949011b
added interview questions to pdf, refactor question format
Browse files
app.py
CHANGED
@@ -1,16 +1,25 @@
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
-
from utils import (
|
3 |
-
evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
|
4 |
-
score_candidate, parse_resume, summarize_resume, extract_keywords, generate_interview_questions_from_summaries
|
5 |
-
)
|
6 |
-
from config import supabase
|
7 |
-
from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
8 |
import fitz # PyMuPDF
|
9 |
-
from io import BytesIO
|
10 |
-
from dotenv import load_dotenv
|
11 |
-
import os
|
12 |
import requests
|
|
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def main():
|
15 |
st.set_page_config(page_title="TalentLens.AI", layout="centered")
|
16 |
|
@@ -18,28 +27,33 @@ def main():
|
|
18 |
st.divider()
|
19 |
st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
|
20 |
|
21 |
-
#
|
22 |
-
uploaded_files = st.file_uploader(
|
|
|
|
|
|
|
|
|
|
|
23 |
if uploaded_files and len(uploaded_files) > 10:
|
24 |
st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
|
25 |
return
|
26 |
|
|
|
27 |
job_description = st.text_area("Enter Job Description")
|
28 |
|
|
|
29 |
if st.button("Evaluate Resumes"):
|
30 |
if not job_description:
|
31 |
st.error("⚠️ Please enter a job description.")
|
32 |
return
|
|
|
33 |
if not uploaded_files:
|
34 |
st.error("⚠️ Please upload at least one resume.")
|
35 |
return
|
36 |
-
|
37 |
st.write("### 📊 Evaluating Resumes...")
|
38 |
-
|
39 |
-
# 🔹 Extract required keywords dynamically from the job description
|
40 |
-
# required_keywords = extract_keywords(job_description)
|
41 |
-
# st.write(f"**Extracted Keywords:** {', '.join(required_keywords)}")
|
42 |
|
|
|
43 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
44 |
|
45 |
if not shortlisted:
|
@@ -49,21 +63,22 @@ def main():
|
|
49 |
for candidate in shortlisted:
|
50 |
st.write(f"**{candidate['name']}**")
|
51 |
|
52 |
-
# Generate PDF Report
|
53 |
-
pdf_report = generate_pdf_report(shortlisted)
|
54 |
-
st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
|
55 |
-
|
56 |
# Generate Interview Questions
|
57 |
questions = generate_interview_questions_from_summaries(shortlisted)
|
58 |
st.subheader("🧠 Suggested Interview Questions:")
|
59 |
for idx, q in enumerate(questions, 1):
|
60 |
-
st.markdown(f"
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
#
|
63 |
if removed_candidates:
|
64 |
st.subheader("❌ Resumes Removed:")
|
65 |
for removed in removed_candidates:
|
66 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
67 |
|
|
|
68 |
if __name__ == "__main__":
|
69 |
main()
|
|
|
1 |
+
import os
|
2 |
+
from io import BytesIO
|
3 |
+
|
4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import fitz # PyMuPDF
|
|
|
|
|
|
|
6 |
import requests
|
7 |
+
from dotenv import load_dotenv
|
8 |
|
9 |
+
from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
10 |
+
from utils import (
|
11 |
+
evaluate_resumes,
|
12 |
+
generate_pdf_report,
|
13 |
+
store_in_supabase,
|
14 |
+
extract_email,
|
15 |
+
score_candidate,
|
16 |
+
parse_resume,
|
17 |
+
summarize_resume,
|
18 |
+
extract_keywords,
|
19 |
+
generate_interview_questions_from_summaries,
|
20 |
+
)
|
21 |
+
|
22 |
+
# ------------------------- Main App Function -------------------------
|
23 |
def main():
|
24 |
st.set_page_config(page_title="TalentLens.AI", layout="centered")
|
25 |
|
|
|
27 |
st.divider()
|
28 |
st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
|
29 |
|
30 |
+
# Upload resumes (limit: 10 files)
|
31 |
+
uploaded_files = st.file_uploader(
|
32 |
+
"Upload Resumes (PDF Only, Max: 10)",
|
33 |
+
accept_multiple_files=True,
|
34 |
+
type=["pdf"]
|
35 |
+
)
|
36 |
+
|
37 |
if uploaded_files and len(uploaded_files) > 10:
|
38 |
st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
|
39 |
return
|
40 |
|
41 |
+
# Input job description
|
42 |
job_description = st.text_area("Enter Job Description")
|
43 |
|
44 |
+
# Evaluation trigger
|
45 |
if st.button("Evaluate Resumes"):
|
46 |
if not job_description:
|
47 |
st.error("⚠️ Please enter a job description.")
|
48 |
return
|
49 |
+
|
50 |
if not uploaded_files:
|
51 |
st.error("⚠️ Please upload at least one resume.")
|
52 |
return
|
53 |
+
|
54 |
st.write("### 📊 Evaluating Resumes...")
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
# Resume Evaluation
|
57 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
58 |
|
59 |
if not shortlisted:
|
|
|
63 |
for candidate in shortlisted:
|
64 |
st.write(f"**{candidate['name']}**")
|
65 |
|
|
|
|
|
|
|
|
|
66 |
# Generate Interview Questions
|
67 |
questions = generate_interview_questions_from_summaries(shortlisted)
|
68 |
st.subheader("🧠 Suggested Interview Questions:")
|
69 |
for idx, q in enumerate(questions, 1):
|
70 |
+
st.markdown(f"{q}")
|
71 |
+
|
72 |
+
# Downloadable PDF Report
|
73 |
+
pdf_report = generate_pdf_report(shortlisted, questions)
|
74 |
+
st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
|
75 |
|
76 |
+
# Removed Candidates Info
|
77 |
if removed_candidates:
|
78 |
st.subheader("❌ Resumes Removed:")
|
79 |
for removed in removed_candidates:
|
80 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
81 |
|
82 |
+
# ------------------------- Run the App -------------------------
|
83 |
if __name__ == "__main__":
|
84 |
main()
|
utils.py
CHANGED
@@ -1,43 +1,53 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
4 |
import re
|
|
|
|
|
|
|
5 |
from io import BytesIO
|
6 |
-
import supabase
|
7 |
-
from config import (
|
8 |
-
SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
|
9 |
-
supabase, HF_MODELS, query, embedding_model
|
10 |
-
)
|
11 |
-
from sentence_transformers import SentenceTransformer, util
|
12 |
-
import spacy
|
13 |
from collections import Counter
|
14 |
-
|
|
|
|
|
|
|
|
|
15 |
import streamlit as st
|
16 |
from fuzzywuzzy import fuzz
|
17 |
-
import
|
18 |
-
import
|
19 |
from huggingface_hub import InferenceClient
|
20 |
-
import os
|
21 |
|
22 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
client = InferenceClient(
|
24 |
model="google/gemma-1.1-7b-it",
|
25 |
token=HF_API_TOKEN
|
26 |
)
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
# Load spaCy NLP model
|
31 |
try:
|
32 |
nlp = spacy.load("en_core_web_sm")
|
33 |
except OSError:
|
34 |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
35 |
nlp = spacy.load("en_core_web_sm")
|
36 |
|
|
|
|
|
|
|
37 |
def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
38 |
-
"""
|
39 |
-
candidates
|
40 |
-
|
|
|
41 |
|
42 |
for pdf_file in uploaded_files:
|
43 |
resume_text = parse_resume(pdf_file)
|
@@ -47,7 +57,7 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
|
47 |
|
48 |
if score < 0.20:
|
49 |
removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
|
50 |
-
continue
|
51 |
|
52 |
candidates.append({
|
53 |
"name": pdf_file.name,
|
@@ -57,240 +67,194 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
|
57 |
"summary": summary
|
58 |
})
|
59 |
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
-
# 🔹 Step 3: Log removed candidates
|
64 |
for name in keyword_removed:
|
65 |
removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
|
66 |
|
67 |
-
|
68 |
-
|
|
|
69 |
|
70 |
-
# 🔹 Step 5: Ensure return value is always a list
|
71 |
-
if not isinstance(shortlisted_candidates, list):
|
72 |
-
print("⚠️ ERROR: shortlisted_candidates is not a list! Returning empty list.")
|
73 |
-
return [], removed_candidates
|
74 |
|
75 |
-
|
76 |
|
77 |
def extract_keywords(text, top_n=10):
|
78 |
-
"""
|
79 |
-
|
|
|
|
|
80 |
return []
|
81 |
|
82 |
doc = nlp(text.lower())
|
|
|
83 |
|
84 |
-
|
85 |
-
keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not token.is_stop]
|
86 |
-
|
87 |
-
if not keywords: # If no valid keywords were found, return an empty list
|
88 |
return []
|
89 |
|
90 |
-
# Use TF-IDF to rank keywords
|
91 |
-
vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
|
92 |
-
|
93 |
try:
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
keyword_scores = sorted(zip(feature_array, tfidf_scores), key=lambda x: x[1], reverse=True)
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
except ValueError: # Catch empty vocabulary error
|
104 |
return []
|
105 |
|
|
|
106 |
def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
|
107 |
-
"""
|
|
|
|
|
108 |
job_keywords = extract_keywords(job_description)
|
109 |
-
filtered_resumes = []
|
110 |
-
removed_resumes = []
|
111 |
-
|
112 |
if len(job_keywords) < min_keyword_match:
|
113 |
-
st.warning("⚠️ Job description
|
114 |
-
return resumes, []
|
|
|
|
|
115 |
|
116 |
for resume in resumes:
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
for keyword in job_keywords:
|
122 |
-
for word in resume_text.split():
|
123 |
-
if fuzz.partial_ratio(keyword, word) > 80: # 80% similarity threshold
|
124 |
-
matched_keywords.append(keyword)
|
125 |
-
|
126 |
-
# Enforce minimum keyword matches
|
127 |
-
if len(set(matched_keywords)) >= min_keyword_match:
|
128 |
-
filtered_resumes.append(resume)
|
129 |
-
else:
|
130 |
-
removed_resumes.append(resume["name"])
|
131 |
|
132 |
-
|
|
|
|
|
|
|
133 |
|
134 |
-
|
135 |
-
"""Extracts text from a resume PDF."""
|
136 |
-
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
137 |
-
text = "\n".join([page.get_text("text") for page in doc])
|
138 |
-
return text
|
139 |
|
140 |
-
def extract_email(resume_text):
|
141 |
-
"""Extracts an email address from resume text."""
|
142 |
-
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
|
143 |
-
return match.group(0) if match else None
|
144 |
|
145 |
def score_candidate(resume_text, job_description):
|
146 |
"""
|
147 |
-
|
148 |
-
|
149 |
-
:param resume_text: The extracted resume text.
|
150 |
-
:param job_description: The job description for comparison.
|
151 |
-
:return: A numerical score (cosine similarity between 0 and 1).
|
152 |
"""
|
153 |
try:
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
157 |
|
158 |
-
# Compute cosine similarity
|
159 |
-
score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()
|
160 |
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
-
except Exception as e:
|
164 |
-
print(f"Error computing similarity score: {e}")
|
165 |
-
return 0 # Return 0 if scoring fails
|
166 |
|
167 |
def summarize_resume(resume_text):
|
168 |
"""
|
169 |
-
|
170 |
"""
|
171 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
172 |
response = query(payload, model="bart")
|
173 |
|
174 |
if not response:
|
175 |
-
print("⚠️ Error: API response is None. Returning fallback summary.")
|
176 |
return "Summary unavailable due to API issues."
|
177 |
|
178 |
try:
|
179 |
-
if isinstance(response, list)
|
180 |
-
|
181 |
-
|
182 |
-
if isinstance(response, dict):
|
183 |
-
summary = response.get("generated_text") or response.get("summary_text")
|
184 |
-
|
185 |
-
if summary:
|
186 |
-
return summary.strip()
|
187 |
-
else:
|
188 |
-
print("⚠️ Unexpected API response format:", response)
|
189 |
-
return "Summary unavailable."
|
190 |
|
|
|
191 |
except Exception as e:
|
192 |
-
print(f"
|
193 |
return "Summary unavailable."
|
194 |
|
195 |
-
|
|
|
196 |
|
197 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
198 |
"""
|
199 |
-
|
200 |
-
|
201 |
-
:param resume_text: The extracted resume text.
|
202 |
-
:param score: The candidate's score (must be a valid number).
|
203 |
-
:param candidate_name: The candidate's name.
|
204 |
-
:param email: Candidate's email address.
|
205 |
-
:param summary: A summarized version of the resume.
|
206 |
"""
|
207 |
-
if score is None:
|
208 |
-
score = 0 # Ensure score is never NULL
|
209 |
-
|
210 |
data = {
|
211 |
"name": candidate_name,
|
212 |
"resume": resume_text,
|
213 |
-
"score": score,
|
214 |
"email": email,
|
215 |
"summary": summary
|
216 |
}
|
217 |
|
218 |
-
|
219 |
-
return response
|
220 |
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
223 |
pdf = BytesIO()
|
224 |
doc = fitz.open()
|
225 |
|
226 |
for candidate in shortlisted_candidates:
|
227 |
page = doc.new_page()
|
228 |
-
|
229 |
-
# Use stored summary, or provide a fallback
|
230 |
-
summary = candidate.get("summary", "No summary available")
|
231 |
-
|
232 |
-
# Generate interview questions
|
233 |
-
#questions = generate_interview_questions_from_summaries(summary)
|
234 |
-
#questions_text = "\n".join([f"- {q}" for q in questions])
|
235 |
-
|
236 |
-
# Define text area properties
|
237 |
-
text_box_x = 50 # Left margin
|
238 |
-
text_box_y = 50 # Top margin
|
239 |
-
text_box_width = 500 # Max width before wrapping
|
240 |
-
text_box_height = 700 # Max height before splitting to a new page
|
241 |
-
font_size = 11 # Font size for better readability
|
242 |
-
|
243 |
-
# Format candidate details
|
244 |
-
candidate_info = (
|
245 |
f"Candidate: {candidate['name']}\n"
|
246 |
f"Email: {candidate['email']}\n"
|
247 |
f"Score: {candidate['score']}\n\n"
|
248 |
-
f"Summary:\n{summary}"
|
249 |
-
#f"Suggested Interview Questions:\n{questions_text}"
|
250 |
)
|
|
|
251 |
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
# If text overflows, split across multiple pages
|
257 |
-
while text_length == 0: # 0 means text didn't fit
|
258 |
-
page = doc.new_page() # Create new page
|
259 |
-
text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
|
260 |
-
text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
|
261 |
|
262 |
doc.save(pdf)
|
263 |
pdf.seek(0)
|
264 |
return pdf
|
265 |
|
|
|
266 |
def generate_interview_questions_from_summaries(candidates):
|
267 |
"""
|
268 |
-
Generates
|
269 |
-
Uses the Hugging Face Gemma model to generate questions.
|
270 |
"""
|
271 |
if not isinstance(candidates, list):
|
272 |
raise TypeError("Expected a list of candidate dictionaries.")
|
273 |
|
274 |
-
summaries =
|
275 |
-
combined_summary = " ".join(summaries)
|
276 |
|
277 |
prompt = (
|
278 |
-
"Based on the following summary of this top candidate for a job role,
|
279 |
-
|
|
|
280 |
)
|
281 |
|
282 |
try:
|
283 |
response = client.chat_completion(
|
284 |
-
messages=[
|
285 |
-
{"role": "user", "content": prompt}
|
286 |
-
],
|
287 |
temperature=0.7,
|
288 |
max_tokens=500
|
289 |
)
|
290 |
-
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
293 |
except Exception as e:
|
294 |
print(f"❌ Error generating interview questions: {e}")
|
295 |
-
return ["⚠️ Error generating questions."]
|
296 |
-
|
|
|
1 |
+
# === Imports ===
|
2 |
+
|
3 |
+
# Standard Library
|
4 |
+
import os
|
5 |
import re
|
6 |
+
import json
|
7 |
+
import random
|
8 |
+
import subprocess
|
9 |
from io import BytesIO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
from collections import Counter
|
11 |
+
|
12 |
+
# Third-Party Libraries
|
13 |
+
import fitz # PyMuPDF
|
14 |
+
import requests
|
15 |
+
import spacy
|
16 |
import streamlit as st
|
17 |
from fuzzywuzzy import fuzz
|
18 |
+
from sentence_transformers import SentenceTransformer, util
|
19 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
20 |
from huggingface_hub import InferenceClient
|
|
|
21 |
|
22 |
+
# Local Configuration
|
23 |
+
from config import (
|
24 |
+
SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
|
25 |
+
supabase, HF_MODELS, query, embedding_model
|
26 |
+
)
|
27 |
+
|
28 |
+
# === Initialization ===
|
29 |
+
|
30 |
+
# Hugging Face inference client for Gemma model
|
31 |
client = InferenceClient(
|
32 |
model="google/gemma-1.1-7b-it",
|
33 |
token=HF_API_TOKEN
|
34 |
)
|
35 |
|
36 |
+
# Load or download spaCy model
|
|
|
|
|
37 |
try:
|
38 |
nlp = spacy.load("en_core_web_sm")
|
39 |
except OSError:
|
40 |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
41 |
nlp = spacy.load("en_core_web_sm")
|
42 |
|
43 |
+
|
44 |
+
# === Core Resume Evaluation ===
|
45 |
+
|
46 |
def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
47 |
+
"""
|
48 |
+
Evaluate uploaded resumes and return shortlisted candidates with scores and summaries.
|
49 |
+
"""
|
50 |
+
candidates, removed_candidates = [], []
|
51 |
|
52 |
for pdf_file in uploaded_files:
|
53 |
resume_text = parse_resume(pdf_file)
|
|
|
57 |
|
58 |
if score < 0.20:
|
59 |
removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
|
60 |
+
continue
|
61 |
|
62 |
candidates.append({
|
63 |
"name": pdf_file.name,
|
|
|
67 |
"summary": summary
|
68 |
})
|
69 |
|
70 |
+
filtered_candidates, keyword_removed = filter_resumes_by_keywords(
|
71 |
+
candidates, job_description, min_keyword_match
|
72 |
+
)
|
73 |
|
|
|
74 |
for name in keyword_removed:
|
75 |
removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
|
76 |
|
77 |
+
shortlisted = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]
|
78 |
+
|
79 |
+
return shortlisted if isinstance(shortlisted, list) else [], removed_candidates
|
80 |
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
# === Keyword & Scoring Functions ===
|
83 |
|
84 |
def extract_keywords(text, top_n=10):
|
85 |
+
"""
|
86 |
+
Extracts top keywords from the job description using spaCy and TF-IDF.
|
87 |
+
"""
|
88 |
+
if not text.strip():
|
89 |
return []
|
90 |
|
91 |
doc = nlp(text.lower())
|
92 |
+
keywords = [t.text for t in doc if t.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not t.is_stop]
|
93 |
|
94 |
+
if not keywords:
|
|
|
|
|
|
|
95 |
return []
|
96 |
|
|
|
|
|
|
|
97 |
try:
|
98 |
+
tfidf = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
|
99 |
+
matrix = tfidf.fit_transform([" ".join(keywords)])
|
100 |
+
scores = matrix.toarray()[0]
|
101 |
+
features = tfidf.get_feature_names_out()
|
102 |
+
ranked = sorted(zip(features, scores), key=lambda x: x[1], reverse=True)
|
103 |
|
104 |
+
return [kw for kw, _ in ranked[:top_n]]
|
|
|
105 |
|
106 |
+
except ValueError:
|
|
|
|
|
107 |
return []
|
108 |
|
109 |
+
|
110 |
def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
|
111 |
+
"""
|
112 |
+
Filters resumes by keyword match using fuzzy logic.
|
113 |
+
"""
|
114 |
job_keywords = extract_keywords(job_description)
|
|
|
|
|
|
|
115 |
if len(job_keywords) < min_keyword_match:
|
116 |
+
st.warning("⚠️ Job description too short or missing for keyword filtering.")
|
117 |
+
return resumes, []
|
118 |
+
|
119 |
+
filtered, removed = [], []
|
120 |
|
121 |
for resume in resumes:
|
122 |
+
matched = {
|
123 |
+
keyword for keyword in job_keywords
|
124 |
+
if any(fuzz.partial_ratio(keyword, word) > 80 for word in resume["resume"].lower().split())
|
125 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
+
if len(matched) >= min_keyword_match:
|
128 |
+
filtered.append(resume)
|
129 |
+
else:
|
130 |
+
removed.append(resume["name"])
|
131 |
|
132 |
+
return filtered, removed
|
|
|
|
|
|
|
|
|
133 |
|
|
|
|
|
|
|
|
|
134 |
|
135 |
def score_candidate(resume_text, job_description):
|
136 |
"""
|
137 |
+
Computes cosine similarity between resume and job description using embeddings.
|
|
|
|
|
|
|
|
|
138 |
"""
|
139 |
try:
|
140 |
+
resume_vec = embedding_model.encode(resume_text, convert_to_tensor=True)
|
141 |
+
job_vec = embedding_model.encode(job_description, convert_to_tensor=True)
|
142 |
+
score = util.pytorch_cos_sim(resume_vec, job_vec).item()
|
143 |
+
return round(score, 4)
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Error computing similarity: {e}")
|
146 |
+
return 0
|
147 |
|
|
|
|
|
148 |
|
149 |
+
# === Text Extraction & Summarization ===
|
150 |
+
|
151 |
+
def parse_resume(pdf_file):
|
152 |
+
"""
|
153 |
+
Extracts raw text from a PDF file.
|
154 |
+
"""
|
155 |
+
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
156 |
+
return "\n".join([page.get_text("text") for page in doc])
|
157 |
+
|
158 |
+
|
159 |
+
def extract_email(resume_text):
|
160 |
+
"""
|
161 |
+
Extracts the first valid email found in text.
|
162 |
+
"""
|
163 |
+
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
|
164 |
+
return match.group(0) if match else None
|
165 |
|
|
|
|
|
|
|
166 |
|
167 |
def summarize_resume(resume_text):
|
168 |
"""
|
169 |
+
Generates a summary of the resume using Hugging Face BART.
|
170 |
"""
|
171 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
172 |
response = query(payload, model="bart")
|
173 |
|
174 |
if not response:
|
|
|
175 |
return "Summary unavailable due to API issues."
|
176 |
|
177 |
try:
|
178 |
+
response = response[0] if isinstance(response, list) else response
|
179 |
+
summary = response.get("generated_text") or response.get("summary_text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
return summary.strip() if summary else "Summary unavailable."
|
182 |
except Exception as e:
|
183 |
+
print(f"Error parsing summary: {e}")
|
184 |
return "Summary unavailable."
|
185 |
|
186 |
+
|
187 |
+
# === Data Storage & Reporting ===
|
188 |
|
189 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
190 |
"""
|
191 |
+
Saves candidate data to the Supabase table.
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
"""
|
|
|
|
|
|
|
193 |
data = {
|
194 |
"name": candidate_name,
|
195 |
"resume": resume_text,
|
196 |
+
"score": score or 0,
|
197 |
"email": email,
|
198 |
"summary": summary
|
199 |
}
|
200 |
|
201 |
+
return supabase.table("candidates").insert(data).execute()
|
|
|
202 |
|
203 |
+
|
204 |
+
def generate_pdf_report(shortlisted_candidates, questions=None):
|
205 |
+
"""
|
206 |
+
Creates a PDF report summarizing top candidates and interview questions.
|
207 |
+
"""
|
208 |
pdf = BytesIO()
|
209 |
doc = fitz.open()
|
210 |
|
211 |
for candidate in shortlisted_candidates:
|
212 |
page = doc.new_page()
|
213 |
+
info = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
f"Candidate: {candidate['name']}\n"
|
215 |
f"Email: {candidate['email']}\n"
|
216 |
f"Score: {candidate['score']}\n\n"
|
217 |
+
f"Summary:\n{candidate.get('summary', 'No summary available')}"
|
|
|
218 |
)
|
219 |
+
page.insert_textbox(fitz.Rect(50, 50, 550, 750), info, fontsize=11, fontname="helv", align=0)
|
220 |
|
221 |
+
if questions:
|
222 |
+
q_page = doc.new_page()
|
223 |
+
q_text = "Suggested Interview Questions:\n\n" + "\n".join(questions)
|
224 |
+
q_page.insert_textbox(fitz.Rect(50, 50, 550, 750), q_text, fontsize=11, fontname="helv", align=0)
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
doc.save(pdf)
|
227 |
pdf.seek(0)
|
228 |
return pdf
|
229 |
|
230 |
+
|
231 |
def generate_interview_questions_from_summaries(candidates):
|
232 |
"""
|
233 |
+
Generates 5 interview questions based on combined summaries using Gemma model.
|
|
|
234 |
"""
|
235 |
if not isinstance(candidates, list):
|
236 |
raise TypeError("Expected a list of candidate dictionaries.")
|
237 |
|
238 |
+
summaries = " ".join(c.get("summary", "") for c in candidates)
|
|
|
239 |
|
240 |
prompt = (
|
241 |
+
"Based on the following summary of this top candidate for a job role, "
|
242 |
+
"generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
|
243 |
+
f"{summaries}"
|
244 |
)
|
245 |
|
246 |
try:
|
247 |
response = client.chat_completion(
|
248 |
+
messages=[{"role": "user", "content": prompt}],
|
|
|
|
|
249 |
temperature=0.7,
|
250 |
max_tokens=500
|
251 |
)
|
252 |
+
result = response.choices[0].message.content
|
253 |
+
|
254 |
+
questions = [re.sub(r"^(?:\*\*)?(Q?\d+[\.\)\-]?\s*)+(?:\*\*)?", "", q.strip())
|
255 |
+
for q in result.split("\n") if q.strip()]
|
256 |
+
|
257 |
+
return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
|
258 |
except Exception as e:
|
259 |
print(f"❌ Error generating interview questions: {e}")
|
260 |
+
return ["⚠️ Error generating questions."]
|
|