Spaces:
Running
Running
Johnny
commited on
Commit
·
4f034fb
1
Parent(s):
8f771eb
updated config and ultils to include sentence_transformer for score, re-added gemma for summarization
Browse files
config.py
CHANGED
@@ -3,6 +3,7 @@ from dotenv import load_dotenv
|
|
3 |
from supabase import create_client
|
4 |
import requests
|
5 |
import time
|
|
|
6 |
|
7 |
# Load environment variables from .env file
|
8 |
load_dotenv()
|
@@ -14,33 +15,33 @@ if not SUPABASE_KEY:
|
|
14 |
raise ValueError("SUPABASE_KEY is not set in the environment variables.")
|
15 |
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
16 |
|
17 |
-
#
|
|
|
|
|
18 |
HF_MODELS = {
|
19 |
-
"gemma": "https://
|
20 |
-
"bart": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
21 |
}
|
22 |
|
23 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
24 |
if not HF_API_TOKEN:
|
25 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
26 |
|
27 |
-
# Correct API Headers
|
28 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
29 |
|
30 |
def query(payload, model="gemma", retries=3, delay=5):
|
31 |
"""
|
32 |
Sends a request to the Hugging Face API with retries.
|
33 |
-
|
34 |
:param payload: The input data for inference.
|
35 |
-
:param model: The model name ('gemma'
|
36 |
:param retries: Number of times to retry if the request fails.
|
37 |
:param delay: Delay in seconds before retrying.
|
38 |
:return: The model's response in JSON format, or None if all retries fail.
|
39 |
"""
|
40 |
if model not in HF_MODELS:
|
41 |
-
raise ValueError("Invalid model name. Choose 'gemma'
|
42 |
|
43 |
-
api_url = HF_MODELS[model]
|
44 |
|
45 |
for attempt in range(retries):
|
46 |
try:
|
@@ -48,20 +49,23 @@ def query(payload, model="gemma", retries=3, delay=5):
|
|
48 |
|
49 |
if response.status_code == 401:
|
50 |
print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
|
51 |
-
return None
|
|
|
|
|
|
|
|
|
52 |
|
53 |
if response.status_code == 500:
|
54 |
print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
|
55 |
-
time.sleep(delay)
|
56 |
-
continue
|
57 |
-
|
58 |
-
response.raise_for_status() # Raise an error for failed requests (except 500)
|
59 |
|
60 |
-
|
|
|
61 |
|
62 |
except requests.exceptions.RequestException as e:
|
63 |
print(f"Error querying Hugging Face model '{model}': {e}")
|
64 |
-
time.sleep(delay)
|
65 |
|
66 |
print("All retry attempts failed.")
|
67 |
-
return None
|
|
|
3 |
from supabase import create_client
|
4 |
import requests
|
5 |
import time
|
6 |
+
from sentence_transformers import SentenceTransformer # Import the transformer model
|
7 |
|
8 |
# Load environment variables from .env file
|
9 |
load_dotenv()
|
|
|
15 |
raise ValueError("SUPABASE_KEY is not set in the environment variables.")
|
16 |
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
17 |
|
18 |
+
# Load Sentence Transformer Model (scoring)
|
19 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
20 |
+
|
21 |
HF_MODELS = {
|
22 |
+
"gemma": "https://router.huggingface.co/hf-inference/models/Falconsai/text_summarization"
|
|
|
23 |
}
|
24 |
|
25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
26 |
if not HF_API_TOKEN:
|
27 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
28 |
|
|
|
29 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
30 |
|
31 |
def query(payload, model="gemma", retries=3, delay=5):
|
32 |
"""
|
33 |
Sends a request to the Hugging Face API with retries.
|
34 |
+
|
35 |
:param payload: The input data for inference.
|
36 |
+
:param model: The model name ('gemma' for summarization).
|
37 |
:param retries: Number of times to retry if the request fails.
|
38 |
:param delay: Delay in seconds before retrying.
|
39 |
:return: The model's response in JSON format, or None if all retries fail.
|
40 |
"""
|
41 |
if model not in HF_MODELS:
|
42 |
+
raise ValueError("Invalid model name. Choose 'gemma' for summarization.")
|
43 |
|
44 |
+
api_url = HF_MODELS[model]
|
45 |
|
46 |
for attempt in range(retries):
|
47 |
try:
|
|
|
49 |
|
50 |
if response.status_code == 401:
|
51 |
print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
|
52 |
+
return None
|
53 |
+
|
54 |
+
if response.status_code == 402:
|
55 |
+
print(f"Error querying Hugging Face model '{model}': 402 Payment Required. Free tier may not support this model.")
|
56 |
+
return None
|
57 |
|
58 |
if response.status_code == 500:
|
59 |
print(f"Server error (500) on attempt {attempt + 1}. Retrying in {delay} seconds...")
|
60 |
+
time.sleep(delay)
|
61 |
+
continue
|
|
|
|
|
62 |
|
63 |
+
response.raise_for_status()
|
64 |
+
return response.json()
|
65 |
|
66 |
except requests.exceptions.RequestException as e:
|
67 |
print(f"Error querying Hugging Face model '{model}': {e}")
|
68 |
+
time.sleep(delay)
|
69 |
|
70 |
print("All retry attempts failed.")
|
71 |
+
return None
|
utils.py
CHANGED
@@ -4,9 +4,10 @@ import json
|
|
4 |
import re
|
5 |
from io import BytesIO
|
6 |
import supabase
|
7 |
-
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
|
|
|
8 |
|
9 |
-
# These functions will be called in the
|
10 |
|
11 |
def evaluate_resumes(uploaded_files, job_description):
|
12 |
"""Evaluates uploaded resumes and returns shortlisted candidates."""
|
@@ -45,54 +46,54 @@ def extract_email(resume_text):
|
|
45 |
|
46 |
def score_candidate(resume_text, job_description):
|
47 |
"""
|
48 |
-
Scores the candidate's resume based on the job description using
|
49 |
|
50 |
:param resume_text: The extracted resume text.
|
51 |
:param job_description: The job description for comparison.
|
52 |
-
:return: A numerical score (
|
53 |
"""
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
print("API response is None")
|
59 |
-
return 0
|
60 |
|
61 |
-
|
|
|
62 |
|
63 |
-
|
64 |
-
if isinstance(response_gemma, list) and len(response_gemma) > 0:
|
65 |
-
response_gemma = response_gemma[0] # Extract first item if response is a list
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
else:
|
71 |
-
print("Unexpected API response format:", response_gemma) # Debugging
|
72 |
-
return 0 # Default if score is missing
|
73 |
-
except (TypeError, ValueError) as e:
|
74 |
-
print(f"Error parsing score: {e}")
|
75 |
-
return 0
|
76 |
|
77 |
-
# summarize_resume function will use HuggingFace BART model
|
78 |
def summarize_resume(resume_text):
|
79 |
"""
|
80 |
-
Summarizes
|
81 |
|
82 |
-
:param resume_text: The
|
83 |
-
:return: A summarized version of the resume
|
84 |
"""
|
85 |
-
payload = {"inputs": resume_text}
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
|
|
90 |
|
91 |
try:
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
98 |
"""
|
|
|
4 |
import re
|
5 |
from io import BytesIO
|
6 |
import supabase
|
7 |
+
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model
|
8 |
+
from sentence_transformers import SentenceTransformer, util
|
9 |
|
10 |
+
# These functions will be called in the app.py file
|
11 |
|
12 |
def evaluate_resumes(uploaded_files, job_description):
|
13 |
"""Evaluates uploaded resumes and returns shortlisted candidates."""
|
|
|
46 |
|
47 |
def score_candidate(resume_text, job_description):
|
48 |
"""
|
49 |
+
Scores the candidate's resume based on the job description using sentence-transformers.
|
50 |
|
51 |
:param resume_text: The extracted resume text.
|
52 |
:param job_description: The job description for comparison.
|
53 |
+
:return: A numerical score (cosine similarity between 0 and 1).
|
54 |
"""
|
55 |
+
try:
|
56 |
+
# Generate embeddings
|
57 |
+
resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
|
58 |
+
job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)
|
|
|
|
|
59 |
|
60 |
+
# Compute cosine similarity
|
61 |
+
score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()
|
62 |
|
63 |
+
return round(score, 4) # Return similarity score rounded to 4 decimal places
|
|
|
|
|
64 |
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error computing similarity score: {e}")
|
67 |
+
return 0 # Return 0 if scoring fails
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
|
|
69 |
def summarize_resume(resume_text):
|
70 |
"""
|
71 |
+
Summarizes a resume using the Google gemma model.
|
72 |
|
73 |
+
:param resume_text: The resume text to summarize.
|
74 |
+
:return: A summarized version of the resume.
|
75 |
"""
|
76 |
+
payload = {"inputs": f"Summarize this resume: {resume_text}"}
|
77 |
+
|
78 |
+
response = query(payload, model="gemma") # Use gemma for summarization
|
79 |
+
|
80 |
+
if response is None:
|
81 |
+
print("Error: API response is None")
|
82 |
+
return "Summary could not be generated."
|
83 |
|
84 |
+
# If the response is a list, extract the first element
|
85 |
+
if isinstance(response, list) and len(response) > 0:
|
86 |
+
response = response[0]
|
87 |
|
88 |
try:
|
89 |
+
if isinstance(response, dict) and "generated_text" in response:
|
90 |
+
return response["generated_text"]
|
91 |
+
else:
|
92 |
+
print("Unexpected API response format:", response)
|
93 |
+
return "Summary could not be generated."
|
94 |
+
except (TypeError, ValueError) as e:
|
95 |
+
print(f"Error parsing summary: {e}")
|
96 |
+
return "Summary could not be generated."
|
97 |
|
98 |
def store_in_supabase(resume_text, score, candidate_name, email, summary):
|
99 |
"""
|