Spaces:
Running
Running
Johnny
commited on
Commit
·
1131989
1
Parent(s):
8f8f414
updated summarization with validations, update generate_pdf to wrap around text
Browse files
config.py
CHANGED
@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
20 |
|
21 |
HF_MODELS = {
|
22 |
-
"bart": "https://router.huggingface.co/hf-inference/models/
|
23 |
}
|
24 |
|
25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
@@ -28,44 +28,42 @@ if not HF_API_TOKEN:
|
|
28 |
|
29 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
30 |
|
31 |
-
def query(payload, model="bart", retries=
|
32 |
"""
|
33 |
-
Sends a request to the Hugging Face API with retries.
|
34 |
-
|
35 |
-
:param payload: The input data for inference.
|
36 |
-
:param model: The model name ('bart' for summarization).
|
37 |
-
:param retries: Number of times to retry if the request fails.
|
38 |
-
:param delay: Delay in seconds before retrying.
|
39 |
-
:return: The model's response in JSON format, or None if all retries fail.
|
40 |
"""
|
41 |
if model not in HF_MODELS:
|
42 |
-
raise ValueError("Invalid model name. Choose 'bart' for summarization.")
|
43 |
|
44 |
api_url = HF_MODELS[model]
|
45 |
|
46 |
for attempt in range(retries):
|
47 |
try:
|
48 |
-
response = requests.post(api_url, headers=HF_HEADERS, json=payload)
|
49 |
|
50 |
if response.status_code == 401:
|
51 |
-
print(f"
|
52 |
return None
|
53 |
|
54 |
if response.status_code == 402:
|
55 |
-
print(f"
|
56 |
return None
|
57 |
|
58 |
-
if response.status_code
|
59 |
-
print(f"Server error (
|
60 |
time.sleep(delay)
|
61 |
continue
|
62 |
|
63 |
response.raise_for_status()
|
64 |
return response.json()
|
65 |
|
|
|
|
|
|
|
|
|
66 |
except requests.exceptions.RequestException as e:
|
67 |
-
print(f"
|
68 |
time.sleep(delay)
|
69 |
|
70 |
-
print("All retry attempts failed.")
|
71 |
return None
|
|
|
19 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
20 |
|
21 |
HF_MODELS = {
|
22 |
+
"bart": "https://router.huggingface.co/hf-inference/models/transformer3/H2-keywordextractor"
|
23 |
}
|
24 |
|
25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
|
|
28 |
|
29 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
30 |
|
31 |
+
def query(payload, model="bart", retries=5, delay=5):
|
32 |
"""
|
33 |
+
Sends a request to the Hugging Face API with retries and better error handling.
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
if model not in HF_MODELS:
|
36 |
+
raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
|
37 |
|
38 |
api_url = HF_MODELS[model]
|
39 |
|
40 |
for attempt in range(retries):
|
41 |
try:
|
42 |
+
response = requests.post(api_url, headers=HF_HEADERS, json=payload, timeout=10)
|
43 |
|
44 |
if response.status_code == 401:
|
45 |
+
print(f"❌ API Key issue: Check HF_API_TOKEN. Unauthorized (401).")
|
46 |
return None
|
47 |
|
48 |
if response.status_code == 402:
|
49 |
+
print(f"💰 Payment Required (402). Free tier may not support this model.")
|
50 |
return None
|
51 |
|
52 |
+
if response.status_code in [500, 503]:
|
53 |
+
print(f"⚠️ Server error ({response.status_code}) on attempt {attempt + 1}. Retrying in {delay} seconds...")
|
54 |
time.sleep(delay)
|
55 |
continue
|
56 |
|
57 |
response.raise_for_status()
|
58 |
return response.json()
|
59 |
|
60 |
+
except requests.exceptions.Timeout:
|
61 |
+
print(f"⏳ Timeout error on attempt {attempt + 1}. Retrying...")
|
62 |
+
time.sleep(delay)
|
63 |
+
|
64 |
except requests.exceptions.RequestException as e:
|
65 |
+
print(f"❌ API Request Failed: {e}")
|
66 |
time.sleep(delay)
|
67 |
|
68 |
+
print("🚨 All retry attempts failed.")
|
69 |
return None
|
utils.py
CHANGED
@@ -145,30 +145,33 @@ def score_candidate(resume_text, job_description):
|
|
145 |
|
146 |
def summarize_resume(resume_text):
|
147 |
"""
|
148 |
-
Summarizes a resume using the Hugging Face BART model.
|
149 |
"""
|
150 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
151 |
-
response = query(payload, model="bart")
|
152 |
|
153 |
if not response:
|
154 |
-
print("Error: API response is None")
|
155 |
-
return "Summary
|
156 |
|
157 |
try:
|
158 |
-
# Check if response is a list (sometimes HF returns a list with a dict inside)
|
159 |
if isinstance(response, list) and len(response) > 0:
|
160 |
response = response[0]
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
-
|
170 |
-
print(f"Error parsing summary: {e}")
|
171 |
-
return "Summary could not be generated."
|
172 |
|
173 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
174 |
"""
|
@@ -194,26 +197,42 @@ def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
|
194 |
response = supabase.table("candidates").insert(data).execute()
|
195 |
return response
|
196 |
|
197 |
-
# Test with 10 resumes, if they will be shortlisted
|
198 |
def generate_pdf_report(shortlisted_candidates):
|
199 |
-
"""Generates a PDF summary of shortlisted candidates."""
|
200 |
pdf = BytesIO()
|
201 |
doc = fitz.open()
|
202 |
|
203 |
for candidate in shortlisted_candidates:
|
204 |
page = doc.new_page()
|
205 |
|
206 |
-
# Use
|
207 |
summary = candidate.get("summary", "No summary available")
|
208 |
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
f"Candidate: {candidate['name']}\n"
|
212 |
f"Email: {candidate['email']}\n"
|
213 |
-
f"Score: {candidate['score']}\n"
|
214 |
-
f"Summary
|
215 |
)
|
216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
doc.save(pdf)
|
218 |
pdf.seek(0)
|
219 |
return pdf
|
|
|
145 |
|
146 |
def summarize_resume(resume_text):
|
147 |
"""
|
148 |
+
Summarizes a resume using the Hugging Face BART model with improved error handling.
|
149 |
"""
|
150 |
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
151 |
+
response = query(payload, model="bart")
|
152 |
|
153 |
if not response:
|
154 |
+
print("⚠️ Error: API response is None. Returning fallback summary.")
|
155 |
+
return "Summary unavailable due to API issues."
|
156 |
|
157 |
try:
|
|
|
158 |
if isinstance(response, list) and len(response) > 0:
|
159 |
response = response[0]
|
160 |
|
161 |
+
if isinstance(response, dict):
|
162 |
+
summary = response.get("generated_text") or response.get("summary_text")
|
163 |
+
|
164 |
+
if summary:
|
165 |
+
return summary.strip()
|
166 |
+
else:
|
167 |
+
print("⚠️ Unexpected API response format:", response)
|
168 |
+
return "Summary unavailable."
|
169 |
+
|
170 |
+
except Exception as e:
|
171 |
+
print(f"⚠️ Error parsing summary: {e}")
|
172 |
+
return "Summary unavailable."
|
173 |
|
174 |
+
return "Summary unavailable."
|
|
|
|
|
175 |
|
176 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
177 |
"""
|
|
|
197 |
response = supabase.table("candidates").insert(data).execute()
|
198 |
return response
|
199 |
|
|
|
200 |
def generate_pdf_report(shortlisted_candidates):
|
201 |
+
"""Generates a PDF summary of shortlisted candidates with proper text wrapping."""
|
202 |
pdf = BytesIO()
|
203 |
doc = fitz.open()
|
204 |
|
205 |
for candidate in shortlisted_candidates:
|
206 |
page = doc.new_page()
|
207 |
|
208 |
+
# Use stored summary, or provide a fallback
|
209 |
summary = candidate.get("summary", "No summary available")
|
210 |
|
211 |
+
# Define text area properties
|
212 |
+
text_box_x = 50 # Left margin
|
213 |
+
text_box_y = 50 # Top margin
|
214 |
+
text_box_width = 500 # Max width before wrapping
|
215 |
+
text_box_height = 700 # Max height before splitting to a new page
|
216 |
+
font_size = 11 # Font size for better readability
|
217 |
+
|
218 |
+
# Format candidate details
|
219 |
+
candidate_info = (
|
220 |
f"Candidate: {candidate['name']}\n"
|
221 |
f"Email: {candidate['email']}\n"
|
222 |
+
f"Score: {candidate['score']}\n\n"
|
223 |
+
f"Summary:\n{summary}"
|
224 |
)
|
225 |
|
226 |
+
# Check if the text fits in the allowed area
|
227 |
+
text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
|
228 |
+
text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
|
229 |
+
|
230 |
+
# If text overflows, split across multiple pages
|
231 |
+
while text_length == 0: # 0 means text didn't fit
|
232 |
+
page = doc.new_page() # Create new page
|
233 |
+
text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
|
234 |
+
text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
|
235 |
+
|
236 |
doc.save(pdf)
|
237 |
pdf.seek(0)
|
238 |
return pdf
|