Johnny commited on
Commit
2854e2c
·
1 Parent(s): 949011b

added interview questions to pdf, refactor question format

Browse files
Files changed (2) hide show
  1. app.py +37 -22
  2. utils.py +129 -165
app.py CHANGED
@@ -1,16 +1,25 @@
 
 
 
1
  import streamlit as st
2
- from utils import (
3
- evaluate_resumes, generate_pdf_report, store_in_supabase, extract_email,
4
- score_candidate, parse_resume, summarize_resume, extract_keywords, generate_interview_questions_from_summaries
5
- )
6
- from config import supabase
7
- from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
8
  import fitz # PyMuPDF
9
- from io import BytesIO
10
- from dotenv import load_dotenv
11
- import os
12
  import requests
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def main():
15
  st.set_page_config(page_title="TalentLens.AI", layout="centered")
16
 
@@ -18,28 +27,33 @@ def main():
18
  st.divider()
19
  st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
20
 
21
- # Limit resume uploads to 10
22
- uploaded_files = st.file_uploader("Upload Resumes (PDF Only, Max: 10)", accept_multiple_files=True, type=["pdf"])
 
 
 
 
 
23
  if uploaded_files and len(uploaded_files) > 10:
24
  st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
25
  return
26
 
 
27
  job_description = st.text_area("Enter Job Description")
28
 
 
29
  if st.button("Evaluate Resumes"):
30
  if not job_description:
31
  st.error("⚠️ Please enter a job description.")
32
  return
 
33
  if not uploaded_files:
34
  st.error("⚠️ Please upload at least one resume.")
35
  return
36
- #if uploaded_files and job_description:
37
  st.write("### 📊 Evaluating Resumes...")
38
-
39
- # 🔹 Extract required keywords dynamically from the job description
40
- # required_keywords = extract_keywords(job_description)
41
- # st.write(f"**Extracted Keywords:** {', '.join(required_keywords)}")
42
 
 
43
  shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
44
 
45
  if not shortlisted:
@@ -49,21 +63,22 @@ def main():
49
  for candidate in shortlisted:
50
  st.write(f"**{candidate['name']}**")
51
 
52
- # Generate PDF Report
53
- pdf_report = generate_pdf_report(shortlisted)
54
- st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
55
-
56
  # Generate Interview Questions
57
  questions = generate_interview_questions_from_summaries(shortlisted)
58
  st.subheader("🧠 Suggested Interview Questions:")
59
  for idx, q in enumerate(questions, 1):
60
- st.markdown(f"**Q{idx}.** {q}")
 
 
 
 
61
 
62
- # 🔻 Display removed candidates due to missing keywords
63
  if removed_candidates:
64
  st.subheader("❌ Resumes Removed:")
65
  for removed in removed_candidates:
66
  st.write(f"**{removed['name']}** - {removed['reason']}")
67
 
 
68
  if __name__ == "__main__":
69
  main()
 
1
+ import os
2
+ from io import BytesIO
3
+
4
  import streamlit as st
 
 
 
 
 
 
5
  import fitz # PyMuPDF
 
 
 
6
  import requests
7
+ from dotenv import load_dotenv
8
 
9
+ from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_MODELS
10
+ from utils import (
11
+ evaluate_resumes,
12
+ generate_pdf_report,
13
+ store_in_supabase,
14
+ extract_email,
15
+ score_candidate,
16
+ parse_resume,
17
+ summarize_resume,
18
+ extract_keywords,
19
+ generate_interview_questions_from_summaries,
20
+ )
21
+
22
+ # ------------------------- Main App Function -------------------------
23
  def main():
24
  st.set_page_config(page_title="TalentLens.AI", layout="centered")
25
 
 
27
  st.divider()
28
  st.markdown("<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>", unsafe_allow_html=True)
29
 
30
+ # Upload resumes (limit: 10 files)
31
+ uploaded_files = st.file_uploader(
32
+ "Upload Resumes (PDF Only, Max: 10)",
33
+ accept_multiple_files=True,
34
+ type=["pdf"]
35
+ )
36
+
37
  if uploaded_files and len(uploaded_files) > 10:
38
  st.error("⚠️ You can upload a maximum of 10 resumes at a time.")
39
  return
40
 
41
+ # Input job description
42
  job_description = st.text_area("Enter Job Description")
43
 
44
+ # Evaluation trigger
45
  if st.button("Evaluate Resumes"):
46
  if not job_description:
47
  st.error("⚠️ Please enter a job description.")
48
  return
49
+
50
  if not uploaded_files:
51
  st.error("⚠️ Please upload at least one resume.")
52
  return
53
+
54
  st.write("### 📊 Evaluating Resumes...")
 
 
 
 
55
 
56
+ # Resume Evaluation
57
  shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
58
 
59
  if not shortlisted:
 
63
  for candidate in shortlisted:
64
  st.write(f"**{candidate['name']}**")
65
 
 
 
 
 
66
  # Generate Interview Questions
67
  questions = generate_interview_questions_from_summaries(shortlisted)
68
  st.subheader("🧠 Suggested Interview Questions:")
69
  for idx, q in enumerate(questions, 1):
70
+ st.markdown(f"{q}")
71
+
72
+ # Downloadable PDF Report
73
+ pdf_report = generate_pdf_report(shortlisted, questions)
74
+ st.download_button("Download Shortlist Report", pdf_report, "shortlist.pdf")
75
 
76
+ # Removed Candidates Info
77
  if removed_candidates:
78
  st.subheader("❌ Resumes Removed:")
79
  for removed in removed_candidates:
80
  st.write(f"**{removed['name']}** - {removed['reason']}")
81
 
82
+ # ------------------------- Run the App -------------------------
83
  if __name__ == "__main__":
84
  main()
utils.py CHANGED
@@ -1,43 +1,53 @@
1
- import fitz # PyMuPDF for PDF processing
2
- import requests
3
- import json
 
4
  import re
 
 
 
5
  from io import BytesIO
6
- import supabase
7
- from config import (
8
- SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
9
- supabase, HF_MODELS, query, embedding_model
10
- )
11
- from sentence_transformers import SentenceTransformer, util
12
- import spacy
13
  from collections import Counter
14
- from sklearn.feature_extraction.text import TfidfVectorizer
 
 
 
 
15
  import streamlit as st
16
  from fuzzywuzzy import fuzz
17
- import subprocess
18
- import random
19
  from huggingface_hub import InferenceClient
20
- import os
21
 
22
- # Initialize the client
 
 
 
 
 
 
 
 
23
  client = InferenceClient(
24
  model="google/gemma-1.1-7b-it",
25
  token=HF_API_TOKEN
26
  )
27
 
28
- # These functions will be called in the app.py file
29
-
30
- # Load spaCy NLP model
31
  try:
32
  nlp = spacy.load("en_core_web_sm")
33
  except OSError:
34
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
35
  nlp = spacy.load("en_core_web_sm")
36
 
 
 
 
37
  def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
38
- """Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
39
- candidates = []
40
- removed_candidates = []
 
41
 
42
  for pdf_file in uploaded_files:
43
  resume_text = parse_resume(pdf_file)
@@ -47,7 +57,7 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
47
 
48
  if score < 0.20:
49
  removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
50
- continue # Skip adding to candidates list
51
 
52
  candidates.append({
53
  "name": pdf_file.name,
@@ -57,240 +67,194 @@ def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
57
  "summary": summary
58
  })
59
 
60
- # 🔹 Step 2: Filter candidates based on keyword matches
61
- filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match)
 
62
 
63
- # 🔹 Step 3: Log removed candidates
64
  for name in keyword_removed:
65
  removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
66
 
67
- # 🔹 Step 4: Ensure the final list is sorted by score and limit to top 5 candidates
68
- shortlisted_candidates = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]
 
69
 
70
- # 🔹 Step 5: Ensure return value is always a list
71
- if not isinstance(shortlisted_candidates, list):
72
- print("⚠️ ERROR: shortlisted_candidates is not a list! Returning empty list.")
73
- return [], removed_candidates
74
 
75
- return shortlisted_candidates, removed_candidates
76
 
77
  def extract_keywords(text, top_n=10):
78
- """Extracts key terms from the job description using TF-IDF and spaCy."""
79
- if not text.strip(): # Handle empty job descriptions
 
 
80
  return []
81
 
82
  doc = nlp(text.lower())
 
83
 
84
- # Extract meaningful words (nouns, proper nouns, verbs, adjectives)
85
- keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not token.is_stop]
86
-
87
- if not keywords: # If no valid keywords were found, return an empty list
88
  return []
89
 
90
- # Use TF-IDF to rank keywords
91
- vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
92
-
93
  try:
94
- tfidf_matrix = vectorizer.fit_transform([" ".join(keywords)])
95
- feature_array = vectorizer.get_feature_names_out()
96
- tfidf_scores = tfidf_matrix.toarray()[0]
 
 
97
 
98
- # Sort by highest TF-IDF scores
99
- keyword_scores = sorted(zip(feature_array, tfidf_scores), key=lambda x: x[1], reverse=True)
100
 
101
- return [kw for kw, score in keyword_scores[:top_n]]
102
-
103
- except ValueError: # Catch empty vocabulary error
104
  return []
105
 
 
106
  def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
107
- """Filters resumes based on keyword presence and similarity."""
 
 
108
  job_keywords = extract_keywords(job_description)
109
- filtered_resumes = []
110
- removed_resumes = []
111
-
112
  if len(job_keywords) < min_keyword_match:
113
- st.warning("⚠️ Job description is either too short or absent for keyword filtering.")
114
- return resumes, [] # Skip keyword filtering if job description lacks enough keywords
 
 
115
 
116
  for resume in resumes:
117
- resume_text = resume["resume"].lower()
118
- matched_keywords = []
119
-
120
- # Apply fuzzy matching to allow flexible keyword detection
121
- for keyword in job_keywords:
122
- for word in resume_text.split():
123
- if fuzz.partial_ratio(keyword, word) > 80: # 80% similarity threshold
124
- matched_keywords.append(keyword)
125
-
126
- # Enforce minimum keyword matches
127
- if len(set(matched_keywords)) >= min_keyword_match:
128
- filtered_resumes.append(resume)
129
- else:
130
- removed_resumes.append(resume["name"])
131
 
132
- return filtered_resumes, removed_resumes
 
 
 
133
 
134
- def parse_resume(pdf_file):
135
- """Extracts text from a resume PDF."""
136
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
137
- text = "\n".join([page.get_text("text") for page in doc])
138
- return text
139
 
140
- def extract_email(resume_text):
141
- """Extracts an email address from resume text."""
142
- match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
143
- return match.group(0) if match else None
144
 
145
  def score_candidate(resume_text, job_description):
146
  """
147
- Scores the candidate's resume based on the job description using sentence-transformers.
148
-
149
- :param resume_text: The extracted resume text.
150
- :param job_description: The job description for comparison.
151
- :return: A numerical score (cosine similarity between 0 and 1).
152
  """
153
  try:
154
- # Generate embeddings
155
- resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
156
- job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)
 
 
 
 
157
 
158
- # Compute cosine similarity
159
- score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()
160
 
161
- return round(score, 4) # Return similarity score rounded to 4 decimal places
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- except Exception as e:
164
- print(f"Error computing similarity score: {e}")
165
- return 0 # Return 0 if scoring fails
166
 
167
  def summarize_resume(resume_text):
168
  """
169
- Summarizes a resume using the Hugging Face BART model with improved error handling.
170
  """
171
  payload = {"inputs": f"Summarize this resume: {resume_text}"}
172
  response = query(payload, model="bart")
173
 
174
  if not response:
175
- print("⚠️ Error: API response is None. Returning fallback summary.")
176
  return "Summary unavailable due to API issues."
177
 
178
  try:
179
- if isinstance(response, list) and len(response) > 0:
180
- response = response[0]
181
-
182
- if isinstance(response, dict):
183
- summary = response.get("generated_text") or response.get("summary_text")
184
-
185
- if summary:
186
- return summary.strip()
187
- else:
188
- print("⚠️ Unexpected API response format:", response)
189
- return "Summary unavailable."
190
 
 
191
  except Exception as e:
192
- print(f"⚠️ Error parsing summary: {e}")
193
  return "Summary unavailable."
194
 
195
- return "Summary unavailable."
 
196
 
197
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
198
  """
199
- Stores resume data in Supabase.
200
-
201
- :param resume_text: The extracted resume text.
202
- :param score: The candidate's score (must be a valid number).
203
- :param candidate_name: The candidate's name.
204
- :param email: Candidate's email address.
205
- :param summary: A summarized version of the resume.
206
  """
207
- if score is None:
208
- score = 0 # Ensure score is never NULL
209
-
210
  data = {
211
  "name": candidate_name,
212
  "resume": resume_text,
213
- "score": score,
214
  "email": email,
215
  "summary": summary
216
  }
217
 
218
- response = supabase.table("candidates").insert(data).execute()
219
- return response
220
 
221
- def generate_pdf_report(shortlisted_candidates):
222
- """Generates a PDF summary of shortlisted candidates with proper text wrapping."""
 
 
 
223
  pdf = BytesIO()
224
  doc = fitz.open()
225
 
226
  for candidate in shortlisted_candidates:
227
  page = doc.new_page()
228
-
229
- # Use stored summary, or provide a fallback
230
- summary = candidate.get("summary", "No summary available")
231
-
232
- # Generate interview questions
233
- #questions = generate_interview_questions_from_summaries(summary)
234
- #questions_text = "\n".join([f"- {q}" for q in questions])
235
-
236
- # Define text area properties
237
- text_box_x = 50 # Left margin
238
- text_box_y = 50 # Top margin
239
- text_box_width = 500 # Max width before wrapping
240
- text_box_height = 700 # Max height before splitting to a new page
241
- font_size = 11 # Font size for better readability
242
-
243
- # Format candidate details
244
- candidate_info = (
245
  f"Candidate: {candidate['name']}\n"
246
  f"Email: {candidate['email']}\n"
247
  f"Score: {candidate['score']}\n\n"
248
- f"Summary:\n{summary}"
249
- #f"Suggested Interview Questions:\n{questions_text}"
250
  )
 
251
 
252
- # Check if the text fits in the allowed area
253
- text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
254
- text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
255
-
256
- # If text overflows, split across multiple pages
257
- while text_length == 0: # 0 means text didn't fit
258
- page = doc.new_page() # Create new page
259
- text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
260
- text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
261
 
262
  doc.save(pdf)
263
  pdf.seek(0)
264
  return pdf
265
 
 
266
  def generate_interview_questions_from_summaries(candidates):
267
  """
268
- Generates common interview questions based on the combined summaries of shortlisted candidates.
269
- Uses the Hugging Face Gemma model to generate questions.
270
  """
271
  if not isinstance(candidates, list):
272
  raise TypeError("Expected a list of candidate dictionaries.")
273
 
274
- summaries = [c.get("summary", "") for c in candidates if "summary" in c]
275
- combined_summary = " ".join(summaries)
276
 
277
  prompt = (
278
- "Based on the following summary of this top candidate for a job role, generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
279
- f"{combined_summary}"
 
280
  )
281
 
282
  try:
283
  response = client.chat_completion(
284
- messages=[
285
- {"role": "user", "content": prompt}
286
- ],
287
  temperature=0.7,
288
  max_tokens=500
289
  )
290
- result_text = response.choices[0].message.content
291
- questions = [q.strip() for q in result_text.split("\n") if q.strip()]
292
- return questions[:5] if questions else ["⚠️ No questions generated."]
 
 
 
293
  except Exception as e:
294
  print(f"❌ Error generating interview questions: {e}")
295
- return ["⚠️ Error generating questions."]
296
-
 
1
+ # === Imports ===
2
+
3
+ # Standard Library
4
+ import os
5
  import re
6
+ import json
7
+ import random
8
+ import subprocess
9
  from io import BytesIO
 
 
 
 
 
 
 
10
  from collections import Counter
11
+
12
+ # Third-Party Libraries
13
+ import fitz # PyMuPDF
14
+ import requests
15
+ import spacy
16
  import streamlit as st
17
  from fuzzywuzzy import fuzz
18
+ from sentence_transformers import SentenceTransformer, util
19
+ from sklearn.feature_extraction.text import TfidfVectorizer
20
  from huggingface_hub import InferenceClient
 
21
 
22
+ # Local Configuration
23
+ from config import (
24
+ SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
25
+ supabase, HF_MODELS, query, embedding_model
26
+ )
27
+
28
+ # === Initialization ===
29
+
30
+ # Hugging Face inference client for Gemma model
31
  client = InferenceClient(
32
  model="google/gemma-1.1-7b-it",
33
  token=HF_API_TOKEN
34
  )
35
 
36
+ # Load or download spaCy model
 
 
37
  try:
38
  nlp = spacy.load("en_core_web_sm")
39
  except OSError:
40
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
41
  nlp = spacy.load("en_core_web_sm")
42
 
43
+
44
+ # === Core Resume Evaluation ===
45
+
46
  def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
47
+ """
48
+ Evaluate uploaded resumes and return shortlisted candidates with scores and summaries.
49
+ """
50
+ candidates, removed_candidates = [], []
51
 
52
  for pdf_file in uploaded_files:
53
  resume_text = parse_resume(pdf_file)
 
57
 
58
  if score < 0.20:
59
  removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
60
+ continue
61
 
62
  candidates.append({
63
  "name": pdf_file.name,
 
67
  "summary": summary
68
  })
69
 
70
+ filtered_candidates, keyword_removed = filter_resumes_by_keywords(
71
+ candidates, job_description, min_keyword_match
72
+ )
73
 
 
74
  for name in keyword_removed:
75
  removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})
76
 
77
+ shortlisted = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]
78
+
79
+ return shortlisted if isinstance(shortlisted, list) else [], removed_candidates
80
 
 
 
 
 
81
 
82
+ # === Keyword & Scoring Functions ===
83
 
84
  def extract_keywords(text, top_n=10):
85
+ """
86
+ Extracts top keywords from the job description using spaCy and TF-IDF.
87
+ """
88
+ if not text.strip():
89
  return []
90
 
91
  doc = nlp(text.lower())
92
+ keywords = [t.text for t in doc if t.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not t.is_stop]
93
 
94
+ if not keywords:
 
 
 
95
  return []
96
 
 
 
 
97
  try:
98
+ tfidf = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
99
+ matrix = tfidf.fit_transform([" ".join(keywords)])
100
+ scores = matrix.toarray()[0]
101
+ features = tfidf.get_feature_names_out()
102
+ ranked = sorted(zip(features, scores), key=lambda x: x[1], reverse=True)
103
 
104
+ return [kw for kw, _ in ranked[:top_n]]
 
105
 
106
+ except ValueError:
 
 
107
  return []
108
 
109
+
110
  def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
111
+ """
112
+ Filters resumes by keyword match using fuzzy logic.
113
+ """
114
  job_keywords = extract_keywords(job_description)
 
 
 
115
  if len(job_keywords) < min_keyword_match:
116
+ st.warning("⚠️ Job description too short or missing for keyword filtering.")
117
+ return resumes, []
118
+
119
+ filtered, removed = [], []
120
 
121
  for resume in resumes:
122
+ matched = {
123
+ keyword for keyword in job_keywords
124
+ if any(fuzz.partial_ratio(keyword, word) > 80 for word in resume["resume"].lower().split())
125
+ }
 
 
 
 
 
 
 
 
 
 
126
 
127
+ if len(matched) >= min_keyword_match:
128
+ filtered.append(resume)
129
+ else:
130
+ removed.append(resume["name"])
131
 
132
+ return filtered, removed
 
 
 
 
133
 
 
 
 
 
134
 
135
  def score_candidate(resume_text, job_description):
136
  """
137
+ Computes cosine similarity between resume and job description using embeddings.
 
 
 
 
138
  """
139
  try:
140
+ resume_vec = embedding_model.encode(resume_text, convert_to_tensor=True)
141
+ job_vec = embedding_model.encode(job_description, convert_to_tensor=True)
142
+ score = util.pytorch_cos_sim(resume_vec, job_vec).item()
143
+ return round(score, 4)
144
+ except Exception as e:
145
+ print(f"Error computing similarity: {e}")
146
+ return 0
147
 
 
 
148
 
149
+ # === Text Extraction & Summarization ===
150
+
151
+ def parse_resume(pdf_file):
152
+ """
153
+ Extracts raw text from a PDF file.
154
+ """
155
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
156
+ return "\n".join([page.get_text("text") for page in doc])
157
+
158
+
159
+ def extract_email(resume_text):
160
+ """
161
+ Extracts the first valid email found in text.
162
+ """
163
+ match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
164
+ return match.group(0) if match else None
165
 
 
 
 
166
 
167
  def summarize_resume(resume_text):
168
  """
169
+ Generates a summary of the resume using Hugging Face BART.
170
  """
171
  payload = {"inputs": f"Summarize this resume: {resume_text}"}
172
  response = query(payload, model="bart")
173
 
174
  if not response:
 
175
  return "Summary unavailable due to API issues."
176
 
177
  try:
178
+ response = response[0] if isinstance(response, list) else response
179
+ summary = response.get("generated_text") or response.get("summary_text")
 
 
 
 
 
 
 
 
 
180
 
181
+ return summary.strip() if summary else "Summary unavailable."
182
  except Exception as e:
183
+ print(f"Error parsing summary: {e}")
184
  return "Summary unavailable."
185
 
186
+
187
+ # === Data Storage & Reporting ===
188
 
189
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
190
  """
191
+ Saves candidate data to the Supabase table.
 
 
 
 
 
 
192
  """
 
 
 
193
  data = {
194
  "name": candidate_name,
195
  "resume": resume_text,
196
+ "score": score or 0,
197
  "email": email,
198
  "summary": summary
199
  }
200
 
201
+ return supabase.table("candidates").insert(data).execute()
 
202
 
203
+
204
+ def generate_pdf_report(shortlisted_candidates, questions=None):
205
+ """
206
+ Creates a PDF report summarizing top candidates and interview questions.
207
+ """
208
  pdf = BytesIO()
209
  doc = fitz.open()
210
 
211
  for candidate in shortlisted_candidates:
212
  page = doc.new_page()
213
+ info = (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  f"Candidate: {candidate['name']}\n"
215
  f"Email: {candidate['email']}\n"
216
  f"Score: {candidate['score']}\n\n"
217
+ f"Summary:\n{candidate.get('summary', 'No summary available')}"
 
218
  )
219
+ page.insert_textbox(fitz.Rect(50, 50, 550, 750), info, fontsize=11, fontname="helv", align=0)
220
 
221
+ if questions:
222
+ q_page = doc.new_page()
223
+ q_text = "Suggested Interview Questions:\n\n" + "\n".join(questions)
224
+ q_page.insert_textbox(fitz.Rect(50, 50, 550, 750), q_text, fontsize=11, fontname="helv", align=0)
 
 
 
 
 
225
 
226
  doc.save(pdf)
227
  pdf.seek(0)
228
  return pdf
229
 
230
+
231
  def generate_interview_questions_from_summaries(candidates):
232
  """
233
+ Generates 5 interview questions based on combined summaries using Gemma model.
 
234
  """
235
  if not isinstance(candidates, list):
236
  raise TypeError("Expected a list of candidate dictionaries.")
237
 
238
+ summaries = " ".join(c.get("summary", "") for c in candidates)
 
239
 
240
  prompt = (
241
+ "Based on the following summary of this top candidate for a job role, "
242
+ "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
243
+ f"{summaries}"
244
  )
245
 
246
  try:
247
  response = client.chat_completion(
248
+ messages=[{"role": "user", "content": prompt}],
 
 
249
  temperature=0.7,
250
  max_tokens=500
251
  )
252
+ result = response.choices[0].message.content
253
+
254
+ questions = [re.sub(r"^(?:\*\*)?(Q?\d+[\.\)\-]?\s*)+(?:\*\*)?", "", q.strip())
255
+ for q in result.split("\n") if q.strip()]
256
+
257
+ return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
258
  except Exception as e:
259
  print(f"❌ Error generating interview questions: {e}")
260
+ return ["⚠️ Error generating questions."]