Johnny commited on
Commit
1131989
·
1 Parent(s): 8f8f414

updated summarization with validations, update generate_pdf to wrap around text

Browse files
Files changed (2) hide show
  1. config.py +15 -17
  2. utils.py +40 -21
config.py CHANGED
@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
- "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
23
  }
24
 
25
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
@@ -28,44 +28,42 @@ if not HF_API_TOKEN:
28
 
29
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
30
 
31
- def query(payload, model="bart", retries=3, delay=5):
32
  """
33
- Sends a request to the Hugging Face API with retries.
34
-
35
- :param payload: The input data for inference.
36
- :param model: The model name ('bart' for summarization).
37
- :param retries: Number of times to retry if the request fails.
38
- :param delay: Delay in seconds before retrying.
39
- :return: The model's response in JSON format, or None if all retries fail.
40
  """
41
  if model not in HF_MODELS:
42
- raise ValueError("Invalid model name. Choose 'bart' for summarization.")
43
 
44
  api_url = HF_MODELS[model]
45
 
46
  for attempt in range(retries):
47
  try:
48
- response = requests.post(api_url, headers=HF_HEADERS, json=payload)
49
 
50
  if response.status_code == 401:
51
- print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
52
  return None
53
 
54
  if response.status_code == 402:
55
- print(f"Error querying Hugging Face model '{model}': 402 Payment Required. Free tier may not support this model.")
56
  return None
57
 
58
- if response.status_code == 500:
59
- print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
60
  time.sleep(delay)
61
  continue
62
 
63
  response.raise_for_status()
64
  return response.json()
65
 
 
 
 
 
66
  except requests.exceptions.RequestException as e:
67
- print(f"Error querying Hugging Face model '{model}': {e}")
68
  time.sleep(delay)
69
 
70
- print("All retry attempts failed.")
71
  return None
 
19
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
 
21
  HF_MODELS = {
22
+ "bart": "https://router.huggingface.co/hf-inference/models/transformer3/H2-keywordextractor"
23
  }
24
 
25
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 
28
 
29
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
30
 
31
+ def query(payload, model="bart", retries=5, delay=5):
32
  """
33
+ Sends a request to the Hugging Face API with retries and better error handling.
 
 
 
 
 
 
34
  """
35
  if model not in HF_MODELS:
36
+ raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
37
 
38
  api_url = HF_MODELS[model]
39
 
40
  for attempt in range(retries):
41
  try:
42
+ response = requests.post(api_url, headers=HF_HEADERS, json=payload, timeout=10)
43
 
44
  if response.status_code == 401:
45
+ print(f" API Key issue: Check HF_API_TOKEN. Unauthorized (401).")
46
  return None
47
 
48
  if response.status_code == 402:
49
+ print(f"💰 Payment Required (402). Free tier may not support this model.")
50
  return None
51
 
52
+ if response.status_code in [500, 503]:
53
+ print(f"⚠️ Server error ({response.status_code}) on attempt {attempt + 1}. Retrying in {delay} seconds...")
54
  time.sleep(delay)
55
  continue
56
 
57
  response.raise_for_status()
58
  return response.json()
59
 
60
+ except requests.exceptions.Timeout:
61
+ print(f"⏳ Timeout error on attempt {attempt + 1}. Retrying...")
62
+ time.sleep(delay)
63
+
64
  except requests.exceptions.RequestException as e:
65
+ print(f" API Request Failed: {e}")
66
  time.sleep(delay)
67
 
68
+ print("🚨 All retry attempts failed.")
69
  return None
utils.py CHANGED
@@ -145,30 +145,33 @@ def score_candidate(resume_text, job_description):
145
 
146
  def summarize_resume(resume_text):
147
  """
148
- Summarizes a resume using the Hugging Face BART model.
149
  """
150
  payload = {"inputs": f"Summarize this resume: {resume_text}"}
151
- response = query(payload, model="bart") # Call API
152
 
153
  if not response:
154
- print("Error: API response is None")
155
- return "Summary could not be generated."
156
 
157
  try:
158
- # Check if response is a list (sometimes HF returns a list with a dict inside)
159
  if isinstance(response, list) and len(response) > 0:
160
  response = response[0]
161
 
162
- # Adjust for different response formats
163
- if isinstance(response, dict) and ("generated_text" in response or "summary_text" in response):
164
- return response.get("generated_text", response.get("summary_text", "Summary not available"))
165
- else:
166
- print("Unexpected API response format:", response)
167
- return "Summary could not be generated."
 
 
 
 
 
 
168
 
169
- except (TypeError, ValueError) as e:
170
- print(f"Error parsing summary: {e}")
171
- return "Summary could not be generated."
172
 
173
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
174
  """
@@ -194,26 +197,42 @@ def store_in_supabase(resume_text, score, candidate_name, email, summary):
194
  response = supabase.table("candidates").insert(data).execute()
195
  return response
196
 
197
- # Test with 10 resumes, if they will be shortlisted
198
  def generate_pdf_report(shortlisted_candidates):
199
- """Generates a PDF summary of shortlisted candidates."""
200
  pdf = BytesIO()
201
  doc = fitz.open()
202
 
203
  for candidate in shortlisted_candidates:
204
  page = doc.new_page()
205
 
206
- # Use the stored summary, or provide a fallback
207
  summary = candidate.get("summary", "No summary available")
208
 
209
- page.insert_text(
210
- (50, 50),
 
 
 
 
 
 
 
211
  f"Candidate: {candidate['name']}\n"
212
  f"Email: {candidate['email']}\n"
213
- f"Score: {candidate['score']}\n"
214
- f"Summary: {summary}"
215
  )
216
 
 
 
 
 
 
 
 
 
 
 
217
  doc.save(pdf)
218
  pdf.seek(0)
219
  return pdf
 
145
 
146
  def summarize_resume(resume_text):
147
  """
148
+ Summarizes a resume using the Hugging Face BART model with improved error handling.
149
  """
150
  payload = {"inputs": f"Summarize this resume: {resume_text}"}
151
+ response = query(payload, model="bart")
152
 
153
  if not response:
154
+ print("⚠️ Error: API response is None. Returning fallback summary.")
155
+ return "Summary unavailable due to API issues."
156
 
157
  try:
 
158
  if isinstance(response, list) and len(response) > 0:
159
  response = response[0]
160
 
161
+ if isinstance(response, dict):
162
+ summary = response.get("generated_text") or response.get("summary_text")
163
+
164
+ if summary:
165
+ return summary.strip()
166
+ else:
167
+ print("⚠️ Unexpected API response format:", response)
168
+ return "Summary unavailable."
169
+
170
+ except Exception as e:
171
+ print(f"⚠️ Error parsing summary: {e}")
172
+ return "Summary unavailable."
173
 
174
+ return "Summary unavailable."
 
 
175
 
176
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
177
  """
 
197
  response = supabase.table("candidates").insert(data).execute()
198
  return response
199
 
 
200
  def generate_pdf_report(shortlisted_candidates):
201
+ """Generates a PDF summary of shortlisted candidates with proper text wrapping."""
202
  pdf = BytesIO()
203
  doc = fitz.open()
204
 
205
  for candidate in shortlisted_candidates:
206
  page = doc.new_page()
207
 
208
+ # Use stored summary, or provide a fallback
209
  summary = candidate.get("summary", "No summary available")
210
 
211
+ # Define text area properties
212
+ text_box_x = 50 # Left margin
213
+ text_box_y = 50 # Top margin
214
+ text_box_width = 500 # Max width before wrapping
215
+ text_box_height = 700 # Max height before splitting to a new page
216
+ font_size = 11 # Font size for better readability
217
+
218
+ # Format candidate details
219
+ candidate_info = (
220
  f"Candidate: {candidate['name']}\n"
221
  f"Email: {candidate['email']}\n"
222
+ f"Score: {candidate['score']}\n\n"
223
+ f"Summary:\n{summary}"
224
  )
225
 
226
+ # Check if the text fits in the allowed area
227
+ text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
228
+ text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
229
+
230
+ # If text overflows, split across multiple pages
231
+ while text_length == 0: # 0 means text didn't fit
232
+ page = doc.new_page() # Create new page
233
+ text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
234
+ text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
235
+
236
  doc.save(pdf)
237
  pdf.seek(0)
238
  return pdf