Spaces:

gauravbox
/

TalentLensAI

Running

App Files Files Community

Johnny commited on Apr 3

Commit

1131989

1 Parent(s): 8f8f414

updated summarization with validations, update generate_pdf to wrap around text

Browse files

Files changed (2) hide show

config.py +15 -17
utils.py +40 -21

config.py CHANGED Viewed

@@ -19,7 +19,7 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 HF_MODELS = {
-    "bart": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn"
 }
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
@@ -28,44 +28,42 @@ if not HF_API_TOKEN:
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
-def query(payload, model="bart", retries=3, delay=5):
     """
-    Sends a request to the Hugging Face API with retries.
-    :param payload: The input data for inference.
-    :param model: The model name ('bart' for summarization).
-    :param retries: Number of times to retry if the request fails.
-    :param delay: Delay in seconds before retrying.
-    :return: The model's response in JSON format, or None if all retries fail.
     """
     if model not in HF_MODELS:
-        raise ValueError("Invalid model name. Choose 'bart' for summarization.")
     api_url = HF_MODELS[model]
     for attempt in range(retries):
         try:
-            response = requests.post(api_url, headers=HF_HEADERS, json=payload)
             if response.status_code == 401:
-                print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
                 return None
             if response.status_code == 402:
-                print(f"Error querying Hugging Face model '{model}': 402 Payment Required. Free tier may not support this model.")
                 return None
-            if response.status_code == 500:
-                print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
                 time.sleep(delay)
                 continue
             response.raise_for_status()
             return response.json()
         except requests.exceptions.RequestException as e:
-            print(f"Error querying Hugging Face model '{model}': {e}")
             time.sleep(delay)
-    print("All retry attempts failed.")
     return None

 embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 HF_MODELS = {
+    "bart": "https://router.huggingface.co/hf-inference/models/transformer3/H2-keywordextractor"
 }
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+def query(payload, model="bart", retries=5, delay=5):
     """
+    Sends a request to the Hugging Face API with retries and better error handling.
     """
     if model not in HF_MODELS:
+        raise ValueError(f"Invalid model name: {model}. Choose 'bart' for summarization.")
     api_url = HF_MODELS[model]
     for attempt in range(retries):
         try:
+            response = requests.post(api_url, headers=HF_HEADERS, json=payload, timeout=10)
             if response.status_code == 401:
+                print(f"❌ API Key issue: Check HF_API_TOKEN. Unauthorized (401).")
                 return None
             if response.status_code == 402:
+                print(f"💰 Payment Required (402). Free tier may not support this model.")
                 return None
+            if response.status_code in [500, 503]:
+                print(f"⚠️ Server error ({response.status_code}) on attempt {attempt + 1}. Retrying in {delay} seconds...")
                 time.sleep(delay)
                 continue
             response.raise_for_status()
             return response.json()
+        except requests.exceptions.Timeout:
+            print(f"⏳ Timeout error on attempt {attempt + 1}. Retrying...")
+            time.sleep(delay)
         except requests.exceptions.RequestException as e:
+            print(f"❌ API Request Failed: {e}")
             time.sleep(delay)
+    print("🚨 All retry attempts failed.")
     return None

utils.py CHANGED Viewed

@@ -145,30 +145,33 @@ def score_candidate(resume_text, job_description):
 def summarize_resume(resume_text):
     """
-    Summarizes a resume using the Hugging Face BART model.
     """
     payload = {"inputs": f"Summarize this resume: {resume_text}"}
-    response = query(payload, model="bart")  # Call API
     if not response:
-        print("Error: API response is None")
-        return "Summary could not be generated."
     try:
-        # Check if response is a list (sometimes HF returns a list with a dict inside)
         if isinstance(response, list) and len(response) > 0:
             response = response[0]
-        # Adjust for different response formats
-        if isinstance(response, dict) and ("generated_text" in response or "summary_text" in response):
-            return response.get("generated_text", response.get("summary_text", "Summary not available"))
-        else:
-            print("Unexpected API response format:", response)
-            return "Summary could not be generated."
-    except (TypeError, ValueError) as e:
-        print(f"Error parsing summary: {e}")
-        return "Summary could not be generated."
 def store_in_supabase(resume_text, score, candidate_name, email, summary):
     """
@@ -194,26 +197,42 @@ def store_in_supabase(resume_text, score, candidate_name, email, summary):
     response = supabase.table("candidates").insert(data).execute()
     return response
-# Test with 10 resumes, if they will be shortlisted
 def generate_pdf_report(shortlisted_candidates):
-    """Generates a PDF summary of shortlisted candidates."""
     pdf = BytesIO()
     doc = fitz.open()
     for candidate in shortlisted_candidates:
         page = doc.new_page()
-        # Use the stored summary, or provide a fallback
         summary = candidate.get("summary", "No summary available")
-        page.insert_text(
-            (50, 50),
             f"Candidate: {candidate['name']}\n"
             f"Email: {candidate['email']}\n"
-            f"Score: {candidate['score']}\n"
-            f"Summary: {summary}"
         )
     doc.save(pdf)
     pdf.seek(0)
     return pdf

 def summarize_resume(resume_text):
     """
+    Summarizes a resume using the Hugging Face BART model with improved error handling.
     """
     payload = {"inputs": f"Summarize this resume: {resume_text}"}
+    response = query(payload, model="bart")
     if not response:
+        print("⚠️ Error: API response is None. Returning fallback summary.")
+        return "Summary unavailable due to API issues."
     try:
         if isinstance(response, list) and len(response) > 0:
             response = response[0]
+        if isinstance(response, dict):
+            summary = response.get("generated_text") or response.get("summary_text")
+            if summary:
+                return summary.strip()
+            else:
+                print("⚠️ Unexpected API response format:", response)
+                return "Summary unavailable."
+    except Exception as e:
+        print(f"⚠️ Error parsing summary: {e}")
+        return "Summary unavailable."
+    return "Summary unavailable."
 def store_in_supabase(resume_text, score, candidate_name, email, summary):
     """
     response = supabase.table("candidates").insert(data).execute()
     return response
 def generate_pdf_report(shortlisted_candidates):
+    """Generates a PDF summary of shortlisted candidates with proper text wrapping."""
     pdf = BytesIO()
     doc = fitz.open()
     for candidate in shortlisted_candidates:
         page = doc.new_page()
+        # Use stored summary, or provide a fallback
         summary = candidate.get("summary", "No summary available")
+        # Define text area properties
+        text_box_x = 50  # Left margin
+        text_box_y = 50  # Top margin
+        text_box_width = 500  # Max width before wrapping
+        text_box_height = 700  # Max height before splitting to a new page
+        font_size = 11  # Font size for better readability
+        # Format candidate details
+        candidate_info = (
             f"Candidate: {candidate['name']}\n"
             f"Email: {candidate['email']}\n"
+            f"Score: {candidate['score']}\n\n"
+            f"Summary:\n{summary}"
         )
+        # Check if the text fits in the allowed area
+        text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
+        text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
+        # If text overflows, split across multiple pages
+        while text_length == 0:  # 0 means text didn't fit
+            page = doc.new_page()  # Create new page
+            text_rect = fitz.Rect(text_box_x, text_box_y, text_box_x + text_box_width, text_box_y + text_box_height)
+            text_length = page.insert_textbox(text_rect, candidate_info, fontsize=font_size, fontname="helv", align=0)
     doc.save(pdf)
     pdf.seek(0)
     return pdf