Johnny commited on
Commit
cca9b28
·
1 Parent(s): 2854e2c

updated summarize_resume to pegasus

Browse files
Files changed (2) hide show
  1. config.py +3 -3
  2. utils.py +37 -22
config.py CHANGED
@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
- "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",
23
  "gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
24
 
25
  }
@@ -30,12 +30,12 @@ if not HF_API_TOKEN:
30
 
31
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
32
 
33
- def query(payload, model="bart", retries=5, delay=5):
34
  """
35
  Sends a request to the Hugging Face API with retries and better error handling.
36
  """
37
  if model not in HF_MODELS:
38
- raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
39
 
40
  api_url = HF_MODELS[model]
41
 
 
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
+ "pegasus": "https://router.huggingface.co/hf-inference/models/google/pegasus-xsum",
23
  "gemma": "https://router.huggingface.co/nebius/v1/chat/completions"
24
 
25
  }
 
30
 
31
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
32
 
33
+ def query(payload, model="pegasus", retries=5, delay=5):
34
  """
35
  Sends a request to the Hugging Face API with retries and better error handling.
36
  """
37
  if model not in HF_MODELS:
38
+ raise ValueError(f"Invalid model name: {model}. Choose 'pegasus' for summarization.")
39
 
40
  api_url = HF_MODELS[model]
41
 
utils.py CHANGED
@@ -163,25 +163,26 @@ def extract_email(resume_text):
163
  match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
164
  return match.group(0) if match else None
165
 
166
-
167
  def summarize_resume(resume_text):
168
- """
169
- Generates a summary of the resume using Hugging Face BART.
170
- """
171
- payload = {"inputs": f"Summarize this resume: {resume_text}"}
172
- response = query(payload, model="bart")
173
-
174
- if not response:
175
- return "Summary unavailable due to API issues."
176
 
177
  try:
178
- response = response[0] if isinstance(response, list) else response
179
- summary = response.get("generated_text") or response.get("summary_text")
180
-
181
- return summary.strip() if summary else "Summary unavailable."
 
 
 
182
  except Exception as e:
183
- print(f"Error parsing summary: {e}")
184
- return "Summary unavailable."
185
 
186
 
187
  # === Data Storage & Reporting ===
@@ -229,17 +230,14 @@ def generate_pdf_report(shortlisted_candidates, questions=None):
229
 
230
 
231
  def generate_interview_questions_from_summaries(candidates):
232
- """
233
- Generates 5 interview questions based on combined summaries using Gemma model.
234
- """
235
  if not isinstance(candidates, list):
236
  raise TypeError("Expected a list of candidate dictionaries.")
237
 
238
  summaries = " ".join(c.get("summary", "") for c in candidates)
239
 
240
  prompt = (
241
- "Based on the following summary of this top candidate for a job role, "
242
- "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
243
  f"{summaries}"
244
  )
245
 
@@ -251,10 +249,27 @@ def generate_interview_questions_from_summaries(candidates):
251
  )
252
  result = response.choices[0].message.content
253
 
254
- questions = [re.sub(r"^(?:\*\*)?(Q?\d+[\.\)\-]?\s*)+(?:\*\*)?", "", q.strip())
255
- for q in result.split("\n") if q.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
 
258
  except Exception as e:
259
  print(f"❌ Error generating interview questions: {e}")
260
  return ["⚠️ Error generating questions."]
 
163
  match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
164
  return match.group(0) if match else None
165
 
 
166
  def summarize_resume(resume_text):
167
+ prompt = (
168
+ "You are an expert technical recruiter. Extract a professional summary for this candidate based on their resume text. "
169
+ "Include: full name (if found), job title, years of experience, key technologies/tools, industries worked in, and certifications. "
170
+ "Format it as a professional summary paragraph.\n\n"
171
+ f"Resume:\n{resume_text}\n\n"
172
+ "Summary:"
173
+ )
 
174
 
175
  try:
176
+ response = client.chat_completion(
177
+ messages=[{"role": "user", "content": prompt}],
178
+ temperature=0.5,
179
+ max_tokens=300,
180
+ )
181
+ result = response.choices[0].message.content.strip()
182
+ return result
183
  except Exception as e:
184
+ print(f"Error generating structured summary: {e}")
185
+ return "Summary unavailable due to API issues."
186
 
187
 
188
  # === Data Storage & Reporting ===
 
230
 
231
 
232
  def generate_interview_questions_from_summaries(candidates):
 
 
 
233
  if not isinstance(candidates, list):
234
  raise TypeError("Expected a list of candidate dictionaries.")
235
 
236
  summaries = " ".join(c.get("summary", "") for c in candidates)
237
 
238
  prompt = (
239
+ "Based on the following summary of a top candidate for a job role, "
240
+ "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n\n"
241
  f"{summaries}"
242
  )
243
 
 
249
  )
250
  result = response.choices[0].message.content
251
 
252
+ # Clean and normalize questions
253
+ raw_questions = result.split("\n")
254
+ questions = []
255
+
256
+ for q in raw_questions:
257
+ q = q.strip()
258
+
259
+ # Skip empty lines and markdown headers
260
+ if not q or re.match(r"^#+\s*", q):
261
+ continue
262
+
263
+ # Remove leading "Q1.", "1)", etc.
264
+ q = re.sub(r"^(?:Q?\d+[\.\)\-]?\s*)+", "", q)
265
+
266
+ # Remove markdown bold/italics (**, *, etc.)
267
+ q = re.sub(r"[*_]+", "", q)
268
+
269
+ questions.append(q.strip())
270
 
271
  return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
272
+
273
  except Exception as e:
274
  print(f"❌ Error generating interview questions: {e}")
275
  return ["⚠️ Error generating questions."]