Johnny commited on
Commit
949011b
ยท
1 Parent(s): c2bc50b

added generate questions function with gemma, huggging face client

Browse files
Files changed (3) hide show
  1. app.py +7 -1
  2. config.py +3 -1
  3. utils.py +60 -7
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from utils import (
3
  evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
4
- score_candidate, parse_resume, summarize_resume, extract_keywords
5
  )
6
  from config import supabase
7
  from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
@@ -52,6 +52,12 @@ def main():
52
  # Generate PDF Report
53
  pdf_report = generate_pdf_report(shortlisted)
54
  st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
 
 
 
 
 
 
55
 
56
  # ๐Ÿ”ป Display removed candidates due to missing keywords
57
  if removed_candidates:
 
1
  import streamlit as st
2
  from utils import (
3
  evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
4
+ score_candidate, parse_resume, summarize_resume, extract_keywords, generate_interview_questions_from_summaries
5
  )
6
  from config import supabase
7
  from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
 
52
  # Generate PDF Report
53
  pdf_report = generate_pdf_report(shortlisted)
54
  st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
55
+
56
+ # Generate Interview Questions
57
+ questions = generate_interview_questions_from_summaries(shortlisted)
58
+ st.subheader("๐Ÿง  Suggested Interview Questions:")
59
+ for idx, q in enumerate(questions, 1):
60
+ st.markdown(f"**Q{idx}.** {q}")
61
 
62
  # ๐Ÿ”ป Display removed candidates due to missing keywords
63
  if removed_candidates:
config.py CHANGED
@@ -19,7 +19,9 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
- "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
 
 
23
  }
24
 
25
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
+ "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
23
+ "gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
24
+
25
  }
26
 
27
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
utils.py CHANGED
@@ -15,6 +15,15 @@ from sklearn.feature_extraction.text import TfidfVectorizer
15
  import streamlit as st
16
  from fuzzywuzzy import fuzz
17
  import subprocess
 
 
 
 
 
 
 
 
 
18
 
19
  # These functions will be called in the app.py file
20
 
@@ -28,7 +37,7 @@ except OSError:
28
  def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
29
  """Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
30
  candidates = []
31
- removed_candidates = []
32
 
33
  for pdf_file in uploaded_files:
34
  resume_text = parse_resume(pdf_file)
@@ -36,7 +45,6 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
36
  email = extract_email(resume_text)
37
  summary = summarize_resume(resume_text)
38
 
39
- # If score is below 0.20, remove the candidate immediately
40
  if score < 0.20:
41
  removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
42
  continue # Skip adding to candidates list
@@ -49,14 +57,22 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
49
  "summary": summary
50
  })
51
 
52
- # Filter resumes based on job description keywords
53
- filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match=2)
54
 
55
- # Store removed candidates with a reason
56
  for name in keyword_removed:
57
  removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
58
 
59
- return sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5], removed_candidates
 
 
 
 
 
 
 
 
60
 
61
  def extract_keywords(text, top_n=10):
62
  """Extracts key terms from the job description using TF-IDF and spaCy."""
@@ -212,6 +228,10 @@ def generate_pdf_report(shortlisted_candidates):
212
 
213
  # Use stored summary, or provide a fallback
214
  summary = candidate.get("summary", "No summary available")
 
 
 
 
215
 
216
  # Define text area properties
217
  text_box_x = 50 # Left margin
@@ -226,6 +246,7 @@ def generate_pdf_report(shortlisted_candidates):
226
  f"Email: {candidate['email']}\n"
227
  f"Score: {candidate['score']}\n\n"
228
  f"Summary:\n{summary}"
 
229
  )
230
 
231
  # Check if the text fits in the allowed area
@@ -240,4 +261,36 @@ def generate_pdf_report(shortlisted_candidates):
240
 
241
  doc.save(pdf)
242
  pdf.seek(0)
243
- return pdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import streamlit as st
16
  from fuzzywuzzy import fuzz
17
  import subprocess
18
+ import random
19
+ from huggingface_hub import InferenceClient
20
+ import os
21
+
22
+ # Initialize the client
23
+ client = InferenceClient(
24
+ model="google/gemma-1.1-7b-it",
25
+ token=HF_API_TOKEN
26
+ )
27
 
28
  # These functions will be called in the app.py file
29
 
 
37
  def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
38
  """Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
39
  candidates = []
40
+ removed_candidates = []
41
 
42
  for pdf_file in uploaded_files:
43
  resume_text = parse_resume(pdf_file)
 
45
  email = extract_email(resume_text)
46
  summary = summarize_resume(resume_text)
47
 
 
48
  if score < 0.20:
49
  removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
50
  continue # Skip adding to candidates list
 
57
  "summary": summary
58
  })
59
 
60
+ # ๐Ÿ”น Step 2: Filter candidates based on keyword matches
61
+ filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match)
62
 
63
+ # ๐Ÿ”น Step 3: Log removed candidates
64
  for name in keyword_removed:
65
  removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
66
 
67
+ # ๐Ÿ”น Step 4: Ensure the final list is sorted by score and limit to top 5 candidates
68
+ shortlisted_candidates = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]
69
+
70
+ # ๐Ÿ”น Step 5: Ensure return value is always a list
71
+ if not isinstance(shortlisted_candidates, list):
72
+ print("โš ๏ธ ERROR: shortlisted_candidates is not a list! Returning empty list.")
73
+ return [], removed_candidates
74
+
75
+ return shortlisted_candidates, removed_candidates
76
 
77
  def extract_keywords(text, top_n=10):
78
  """Extracts key terms from the job description using TF-IDF and spaCy."""
 
228
 
229
  # Use stored summary, or provide a fallback
230
  summary = candidate.get("summary", "No summary available")
231
+
232
+ # Generate interview questions
233
+ #questions = generate_interview_questions_from_summaries(summary)
234
+ #questions_text = "\n".join([f"- {q}" for q in questions])
235
 
236
  # Define text area properties
237
  text_box_x = 50 # Left margin
 
246
  f"Email: {candidate['email']}\n"
247
  f"Score: {candidate['score']}\n\n"
248
  f"Summary:\n{summary}"
249
+ #f"Suggested Interview Questions:\n{questions_text}"
250
  )
251
 
252
  # Check if the text fits in the allowed area
 
261
 
262
  doc.save(pdf)
263
  pdf.seek(0)
264
+ return pdf
265
+
266
+ def generate_interview_questions_from_summaries(candidates):
267
+ """
268
+ Generates common interview questions based on the combined summaries of shortlisted candidates.
269
+ Uses the Hugging Face Gemma model to generate questions.
270
+ """
271
+ if not isinstance(candidates, list):
272
+ raise TypeError("Expected a list of candidate dictionaries.")
273
+
274
+ summaries = [c.get("summary", "") for c in candidates if "summary" in c]
275
+ combined_summary = " ".join(summaries)
276
+
277
+ prompt = (
278
+ "Based on the following summary of this top candidate for a job role, generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
279
+ f"{combined_summary}"
280
+ )
281
+
282
+ try:
283
+ response = client.chat_completion(
284
+ messages=[
285
+ {"role": "user", "content": prompt}
286
+ ],
287
+ temperature=0.7,
288
+ max_tokens=500
289
+ )
290
+ result_text = response.choices[0].message.content
291
+ questions = [q.strip() for q in result_text.split("\n") if q.strip()]
292
+ return questions[:5] if questions else ["โš ๏ธ No questions generated."]
293
+ except Exception as e:
294
+ print(f"โŒ Error generating interview questions: {e}")
295
+ return ["โš ๏ธ Error generating questions."]
296
+