Johnny commited on
Commit
4f034fb
·
1 Parent(s): 8f771eb

updated config and ultils to include sentence_transformer for score, re-added gemma for summarization

Browse files
Files changed (2) hide show
  1. config.py +20 -16
  2. utils.py +36 -35
config.py CHANGED
@@ -3,6 +3,7 @@ from dotenv import load_dotenv
3
  from supabase import create_client
4
  import requests
5
  import time
 
6
 
7
  # Load environment variables from .env file
8
  load_dotenv()
@@ -14,33 +15,33 @@ if not SUPABASE_KEY:
14
  raise ValueError("SUPABASE_KEY is not set in the environment variables.")
15
  supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
16
 
17
- # Hugging Face API Config
 
 
18
  HF_MODELS = {
19
- "gemma": "https://api-inference.huggingface.co/models/google/gemma-7b",
20
- "bart": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
21
  }
22
 
23
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
24
  if not HF_API_TOKEN:
25
  raise ValueError("Missing Hugging Face API key. Check your .env file.")
26
 
27
- # Correct API Headers
28
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
29
 
30
  def query(payload, model="gemma", retries=3, delay=5):
31
  """
32
  Sends a request to the Hugging Face API with retries.
33
-
34
  :param payload: The input data for inference.
35
- :param model: The model name ('gemma' or 'bart').
36
  :param retries: Number of times to retry if the request fails.
37
  :param delay: Delay in seconds before retrying.
38
  :return: The model's response in JSON format, or None if all retries fail.
39
  """
40
  if model not in HF_MODELS:
41
- raise ValueError("Invalid model name. Choose 'gemma' or 'bart'.")
42
 
43
- api_url = HF_MODELS[model] # Correct model URL
44
 
45
  for attempt in range(retries):
46
  try:
@@ -48,20 +49,23 @@ def query(payload, model="gemma", retries=3, delay=5):
48
 
49
  if response.status_code == 401:
50
  print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
51
- return None # API key issue
 
 
 
 
52
 
53
  if response.status_code == 500:
54
  print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
55
- time.sleep(delay) # Wait before retrying
56
- continue # Retry the request
57
-
58
- response.raise_for_status() # Raise an error for failed requests (except 500)
59
 
60
- return response.json() # Return the parsed JSON response
 
61
 
62
  except requests.exceptions.RequestException as e:
63
  print(f"Error querying Hugging Face model '{model}': {e}")
64
- time.sleep(delay) # Wait before retrying
65
 
66
  print("All retry attempts failed.")
67
- return None # Return None if all retries fail
 
3
  from supabase import create_client
4
  import requests
5
  import time
6
+ from sentence_transformers import SentenceTransformer # Import the transformer model
7
 
8
  # Load environment variables from .env file
9
  load_dotenv()
 
15
  raise ValueError("SUPABASE_KEY is not set in the environment variables.")
16
  supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
17
 
18
+ # Load Sentence Transformer Model (scoring)
19
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
20
+
21
  HF_MODELS = {
22
+ "gemma": "https://router.huggingface.co/hf-inference/models/Falconsai/text_summarization"
 
23
  }
24
 
25
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
26
  if not HF_API_TOKEN:
27
  raise ValueError("Missing Hugging Face API key. Check your .env file.")
28
 
 
29
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
30
 
31
  def query(payload, model="gemma", retries=3, delay=5):
32
  """
33
  Sends a request to the Hugging Face API with retries.
34
+
35
  :param payload: The input data for inference.
36
+ :param model: The model name ('gemma' for summarization).
37
  :param retries: Number of times to retry if the request fails.
38
  :param delay: Delay in seconds before retrying.
39
  :return: The model's response in JSON format, or None if all retries fail.
40
  """
41
  if model not in HF_MODELS:
42
+ raise ValueError("Invalid model name. Choose 'gemma' for summarization.")
43
 
44
+ api_url = HF_MODELS[model]
45
 
46
  for attempt in range(retries):
47
  try:
 
49
 
50
  if response.status_code == 401:
51
  print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
52
+ return None
53
+
54
+ if response.status_code == 402:
55
+ print(f"Error querying Hugging Face model '{model}': 402 Payment Required. Free tier may not support this model.")
56
+ return None
57
 
58
  if response.status_code == 500:
59
  print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
60
+ time.sleep(delay)
61
+ continue
 
 
62
 
63
+ response.raise_for_status()
64
+ return response.json()
65
 
66
  except requests.exceptions.RequestException as e:
67
  print(f"Error querying Hugging Face model '{model}': {e}")
68
+ time.sleep(delay)
69
 
70
  print("All retry attempts failed.")
71
+ return None
utils.py CHANGED
@@ -4,9 +4,10 @@ import json
4
  import re
5
  from io import BytesIO
6
  import supabase
7
- from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
 
8
 
9
- # These functions will be called in the main.py file
10
 
11
  def evaluate_resumes(uploaded_files, job_description):
12
  """Evaluates uploaded resumes and returns shortlisted candidates."""
@@ -45,54 +46,54 @@ def extract_email(resume_text):
45
 
46
  def score_candidate(resume_text, job_description):
47
  """
48
- Scores the candidate's resume based on the job description using the Hugging Face API.
49
 
50
  :param resume_text: The extracted resume text.
51
  :param job_description: The job description for comparison.
52
- :return: A numerical score (default 0 if scoring fails).
53
  """
54
- payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
55
- response_gemma = query(payload, model="gemma") # Call Hugging Face API
56
-
57
- if response_gemma is None:
58
- print("API response is None")
59
- return 0
60
 
61
- print("API Response:", response_gemma) # Debugging
 
62
 
63
- # Handle list response
64
- if isinstance(response_gemma, list) and len(response_gemma) > 0:
65
- response_gemma = response_gemma[0] # Extract first item if response is a list
66
 
67
- try:
68
- if isinstance(response_gemma, dict) and "score" in response_gemma:
69
- return float(response_gemma["score"])
70
- else:
71
- print("Unexpected API response format:", response_gemma) # Debugging
72
- return 0 # Default if score is missing
73
- except (TypeError, ValueError) as e:
74
- print(f"Error parsing score: {e}")
75
- return 0
76
 
77
- # summarize_resume function will use HuggingFace BART model
78
  def summarize_resume(resume_text):
79
  """
80
- Summarizes the resume using Facebook's BART-Large-CNN model.
81
 
82
- :param resume_text: The extracted resume text.
83
- :return: A summarized version of the resume or an error message.
84
  """
85
- payload = {"inputs": resume_text}
86
- response_bart = query(payload, model="bart")
 
 
 
 
 
87
 
88
- if response_bart is None:
89
- return "Summary could not be generated." # Handle API failures gracefully
 
90
 
91
  try:
92
- summary = response_bart[0].get("summary_text", "Summary not available.")
93
- return summary
94
- except (IndexError, KeyError):
95
- return "Summary not available."
 
 
 
 
96
 
97
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
98
  """
 
4
  import re
5
  from io import BytesIO
6
  import supabase
7
+ from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model
8
+ from sentence_transformers import SentenceTransformer, util
9
 
10
+ # These functions will be called in the app.py file
11
 
12
  def evaluate_resumes(uploaded_files, job_description):
13
  """Evaluates uploaded resumes and returns shortlisted candidates."""
 
46
 
47
  def score_candidate(resume_text, job_description):
48
  """
49
+ Scores the candidate's resume based on the job description using sentence-transformers.
50
 
51
  :param resume_text: The extracted resume text.
52
  :param job_description: The job description for comparison.
53
+ :return: A numerical score (cosine similarity between 0 and 1).
54
  """
55
+ try:
56
+ # Generate embeddings
57
+ resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
58
+ job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)
 
 
59
 
60
+ # Compute cosine similarity
61
+ score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()
62
 
63
+ return round(score, 4) # Return similarity score rounded to 4 decimal places
 
 
64
 
65
+ except Exception as e:
66
+ print(f"Error computing similarity score: {e}")
67
+ return 0 # Return 0 if scoring fails
 
 
 
 
 
 
68
 
 
69
  def summarize_resume(resume_text):
70
  """
71
+ Summarizes a resume using the Google gemma model.
72
 
73
+ :param resume_text: The resume text to summarize.
74
+ :return: A summarized version of the resume.
75
  """
76
+ payload = {"inputs": f"Summarize this resume: {resume_text}"}
77
+
78
+ response = query(payload, model="gemma") # Use gemma for summarization
79
+
80
+ if response is None:
81
+ print("Error: API response is None")
82
+ return "Summary could not be generated."
83
 
84
+ # If the response is a list, extract the first element
85
+ if isinstance(response, list) and len(response) > 0:
86
+ response = response[0]
87
 
88
  try:
89
+ if isinstance(response, dict) and "generated_text" in response:
90
+ return response["generated_text"]
91
+ else:
92
+ print("Unexpected API response format:", response)
93
+ return "Summary could not be generated."
94
+ except (TypeError, ValueError) as e:
95
+ print(f"Error parsing summary: {e}")
96
+ return "Summary could not be generated."
97
 
98
  def store_in_supabase(resume_text, score, candidate_name, email, summary):
99
  """