Spaces:
Running
Running
Johnny
commited on
Commit
ยท
949011b
1
Parent(s):
c2bc50b
added generate questions function with gemma, huggging face client
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from utils import (
|
3 |
evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
|
4 |
-
score_candidate, parse_resume, summarize_resume, extract_keywords
|
5 |
)
|
6 |
from config import supabase
|
7 |
from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
@@ -52,6 +52,12 @@ def main():
|
|
52 |
# Generate PDF Report
|
53 |
pdf_report = generate_pdf_report(shortlisted)
|
54 |
st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
# ๐ป Display removed candidates due to missing keywords
|
57 |
if removed_candidates:
|
|
|
1 |
import streamlit as st
|
2 |
from utils import (
|
3 |
evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
|
4 |
+
score_candidate, parse_resume, summarize_resume, extract_keywords, generate_interview_questions_from_summaries
|
5 |
)
|
6 |
from config import supabase
|
7 |
from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
|
|
|
52 |
# Generate PDF Report
|
53 |
pdf_report = generate_pdf_report(shortlisted)
|
54 |
st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
|
55 |
+
|
56 |
+
# Generate Interview Questions
|
57 |
+
questions = generate_interview_questions_from_summaries(shortlisted)
|
58 |
+
st.subheader("๐ง Suggested Interview Questions:")
|
59 |
+
for idx, q in enumerate(questions, 1):
|
60 |
+
st.markdown(f"**Q{idx}.** {q}")
|
61 |
|
62 |
# ๐ป Display removed candidates due to missing keywords
|
63 |
if removed_candidates:
|
config.py
CHANGED
@@ -19,7 +19,9 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
20 |
|
21 |
HF_MODELS = {
|
22 |
-
"bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
|
|
|
|
|
23 |
}
|
24 |
|
25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
|
|
19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
20 |
|
21 |
HF_MODELS = {
|
22 |
+
"bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
|
23 |
+
"gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
|
24 |
+
|
25 |
}
|
26 |
|
27 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
utils.py
CHANGED
@@ -15,6 +15,15 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
15 |
import streamlit as st
|
16 |
from fuzzywuzzy import fuzz
|
17 |
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# These functions will be called in the app.py file
|
20 |
|
@@ -28,7 +37,7 @@ except OSError:
|
|
28 |
def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
29 |
"""Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
|
30 |
candidates = []
|
31 |
-
removed_candidates = []
|
32 |
|
33 |
for pdf_file in uploaded_files:
|
34 |
resume_text = parse_resume(pdf_file)
|
@@ -36,7 +45,6 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
|
36 |
email = extract_email(resume_text)
|
37 |
summary = summarize_resume(resume_text)
|
38 |
|
39 |
-
# If score is below 0.20, remove the candidate immediately
|
40 |
if score < 0.20:
|
41 |
removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
|
42 |
continue # Skip adding to candidates list
|
@@ -49,14 +57,22 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
|
49 |
"summary": summary
|
50 |
})
|
51 |
|
52 |
-
# Filter
|
53 |
-
filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match
|
54 |
|
55 |
-
#
|
56 |
for name in keyword_removed:
|
57 |
removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
|
58 |
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def extract_keywords(text, top_n=10):
|
62 |
"""Extracts key terms from the job description using TF-IDF and spaCy."""
|
@@ -212,6 +228,10 @@ def generate_pdf_report(shortlisted_candidates):
|
|
212 |
|
213 |
# Use stored summary, or provide a fallback
|
214 |
summary = candidate.get("summary", "No summary available")
|
|
|
|
|
|
|
|
|
215 |
|
216 |
# Define text area properties
|
217 |
text_box_x = 50 # Left margin
|
@@ -226,6 +246,7 @@ def generate_pdf_report(shortlisted_candidates):
|
|
226 |
f"Email: {candidate['email']}\n"
|
227 |
f"Score: {candidate['score']}\n\n"
|
228 |
f"Summary:\n{summary}"
|
|
|
229 |
)
|
230 |
|
231 |
# Check if the text fits in the allowed area
|
@@ -240,4 +261,36 @@ def generate_pdf_report(shortlisted_candidates):
|
|
240 |
|
241 |
doc.save(pdf)
|
242 |
pdf.seek(0)
|
243 |
-
return pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
import streamlit as st
|
16 |
from fuzzywuzzy import fuzz
|
17 |
import subprocess
|
18 |
+
import random
|
19 |
+
from huggingface_hub import InferenceClient
|
20 |
+
import os
|
21 |
+
|
22 |
+
# Initialize the client
|
23 |
+
client = InferenceClient(
|
24 |
+
model="google/gemma-1.1-7b-it",
|
25 |
+
token=HF_API_TOKEN
|
26 |
+
)
|
27 |
|
28 |
# These functions will be called in the app.py file
|
29 |
|
|
|
37 |
def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
|
38 |
"""Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
|
39 |
candidates = []
|
40 |
+
removed_candidates = []
|
41 |
|
42 |
for pdf_file in uploaded_files:
|
43 |
resume_text = parse_resume(pdf_file)
|
|
|
45 |
email = extract_email(resume_text)
|
46 |
summary = summarize_resume(resume_text)
|
47 |
|
|
|
48 |
if score < 0.20:
|
49 |
removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
|
50 |
continue # Skip adding to candidates list
|
|
|
57 |
"summary": summary
|
58 |
})
|
59 |
|
60 |
+
# ๐น Step 2: Filter candidates based on keyword matches
|
61 |
+
filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match)
|
62 |
|
63 |
+
# ๐น Step 3: Log removed candidates
|
64 |
for name in keyword_removed:
|
65 |
removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
|
66 |
|
67 |
+
# ๐น Step 4: Ensure the final list is sorted by score and limit to top 5 candidates
|
68 |
+
shortlisted_candidates = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]
|
69 |
+
|
70 |
+
# ๐น Step 5: Ensure return value is always a list
|
71 |
+
if not isinstance(shortlisted_candidates, list):
|
72 |
+
print("โ ๏ธ ERROR: shortlisted_candidates is not a list! Returning empty list.")
|
73 |
+
return [], removed_candidates
|
74 |
+
|
75 |
+
return shortlisted_candidates, removed_candidates
|
76 |
|
77 |
def extract_keywords(text, top_n=10):
|
78 |
"""Extracts key terms from the job description using TF-IDF and spaCy."""
|
|
|
228 |
|
229 |
# Use stored summary, or provide a fallback
|
230 |
summary = candidate.get("summary", "No summary available")
|
231 |
+
|
232 |
+
# Generate interview questions
|
233 |
+
#questions = generate_interview_questions_from_summaries(summary)
|
234 |
+
#questions_text = "\n".join([f"- {q}" for q in questions])
|
235 |
|
236 |
# Define text area properties
|
237 |
text_box_x = 50 # Left margin
|
|
|
246 |
f"Email: {candidate['email']}\n"
|
247 |
f"Score: {candidate['score']}\n\n"
|
248 |
f"Summary:\n{summary}"
|
249 |
+
#f"Suggested Interview Questions:\n{questions_text}"
|
250 |
)
|
251 |
|
252 |
# Check if the text fits in the allowed area
|
|
|
261 |
|
262 |
doc.save(pdf)
|
263 |
pdf.seek(0)
|
264 |
+
return pdf
|
265 |
+
|
266 |
+
def generate_interview_questions_from_summaries(candidates):
|
267 |
+
"""
|
268 |
+
Generates common interview questions based on the combined summaries of shortlisted candidates.
|
269 |
+
Uses the Hugging Face Gemma model to generate questions.
|
270 |
+
"""
|
271 |
+
if not isinstance(candidates, list):
|
272 |
+
raise TypeError("Expected a list of candidate dictionaries.")
|
273 |
+
|
274 |
+
summaries = [c.get("summary", "") for c in candidates if "summary" in c]
|
275 |
+
combined_summary = " ".join(summaries)
|
276 |
+
|
277 |
+
prompt = (
|
278 |
+
"Based on the following summary of this top candidate for a job role, generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
|
279 |
+
f"{combined_summary}"
|
280 |
+
)
|
281 |
+
|
282 |
+
try:
|
283 |
+
response = client.chat_completion(
|
284 |
+
messages=[
|
285 |
+
{"role": "user", "content": prompt}
|
286 |
+
],
|
287 |
+
temperature=0.7,
|
288 |
+
max_tokens=500
|
289 |
+
)
|
290 |
+
result_text = response.choices[0].message.content
|
291 |
+
questions = [q.strip() for q in result_text.split("\n") if q.strip()]
|
292 |
+
return questions[:5] if questions else ["โ ๏ธ No questions generated."]
|
293 |
+
except Exception as e:
|
294 |
+
print(f"โ Error generating interview questions: {e}")
|
295 |
+
return ["โ ๏ธ Error generating questions."]
|
296 |
+
|