Spaces:

gauravbox
/

TalentLensAI

Running

App Files Files Community

Johnny commited on Mar 23

Commit

edfcf73

1 Parent(s): 19ea0c5

added config.toml, updated requirements.txt, UI update

Browse files

Files changed (5) hide show

.streamlit/config.toml +6 -0
config.py +67 -10
main.py +15 -6
requirements.txt +214 -10
utils.py +86 -42

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,6 @@

+[theme]
+primaryColor="#F63366"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#F0F2F6"
+textColor="#262730"
+font="sans serif"

config.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from dotenv import load_dotenv
 from supabase import create_client
 # Load environment variables from .env file
 load_dotenv()
@@ -12,18 +13,74 @@ if not SUPABASE_KEY:
     raise ValueError("SUPABASE_KEY is not set in the environment variables.")
 supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
 # Hugging Face API Config
-HF_API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-7b"
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_HEADERS = {"Authorization": f"Bearer HF_API_TOKEN"}
-def query(payload):
-    """Sends request to Hugging Face inference API."""
-    import requests
-    response = requests.post(HF_API_URL, headers=HF_HEADERS, json=payload)
-    if response.status_code != 200:
-        print(f"Error: {response.status_code}, {response.text}")  # Debugging
-        return None
-    return response.json()

 import os
 from dotenv import load_dotenv
 from supabase import create_client
+import requests
 # Load environment variables from .env file
 load_dotenv()
     raise ValueError("SUPABASE_KEY is not set in the environment variables.")
 supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
+HF_MODELS = {
+    "gemma": "https://api-inference.huggingface.co/models/google/gemma-7b",
+    "bart": "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
+}
 # Hugging Face API Config
+#HF_API_URL = "https://router.huggingface.co/hf-inference/models/google/gemma-7b"
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 HF_HEADERS = {"Authorization": f"Bearer HF_API_TOKEN"}
+# Ensure the API key is loaded
+if not HF_API_TOKEN:
+    raise ValueError("Missing Hugging Face API key. Check your .env file.")
+#
+def query(payload, model="gemma"):
+    """
+    Sends a request to the selected Hugging Face model API.
+    :param payload: The input data for inference.
+    :param model: Choose either 'gemma' (for google/gemma-7b) or 'bart' (for facebook/bart-large-cnn).
+    :return: The model's response in JSON format, or None if the request fails.
+    """
+    if model not in HF_MODELS:
+        raise ValueError("Invalid model name. Choose 'gemma' or 'bart'.")
+    api_url = f"https://api-inference.huggingface.co/models/{HF_MODELS[model]}"
+    try:
+        response = requests.post(api_url, headers=HF_HEADERS, json=payload)
+        if response.status_code == 401:
+            print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
+            return None  # Handle authentication failure
+        response.raise_for_status()  # Raise an error for failed requests (e.g., 500 errors)
+        return response.json()  # Return the parsed JSON response
+    except requests.exceptions.RequestException as e:
+        print(f"Error querying Hugging Face model '{model}': {e}")
+        return None  # Return None if API call fails
+# Bart query
+def query(payload, model="bart"):
+    """
+    Sends a request to the selected Hugging Face model API.
+    :param payload: The input data for inference.
+    :param model: Choose either 'gemma' (for google/gemma-7b) or 'bart' (for facebook/bart-large-cnn).
+    :return: The model's response in JSON format, or None if the request fails.
+    """
+    if model not in HF_MODELS:
+        raise ValueError("Invalid model name. Choose 'gemma' or 'bart'.")
+    api_url = f"https://api-inference.huggingface.co/models/{HF_MODELS[model]}"
+    try:
+        response = requests.post(api_url, headers=HF_HEADERS, json=payload)
+        if response.status_code == 401:
+            print(f"Error querying Hugging Face model '{model}': 401 Unauthorized. Check API key.")
+            return None  # Handle authentication failure
+        response.raise_for_status()  # Raise an error for failed requests (e.g., 500 errors)
+        return response.json()  # Return the parsed JSON response
+    except requests.exceptions.RequestException as e:
+        print(f"Error querying Hugging Face model '{model}': {e}")
+        return None  # Return None if API call fails

main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
-from utils import process_resumes, generate_pdf_report, store_in_supabase, score_candidate, extract_email, parse_resume
 from config import supabase
-from config import HF_API_TOKEN, HF_API_URL, HF_HEADERS
 import fitz  # PyMuPDF
 from io import BytesIO
 from dotenv import load_dotenv
@@ -9,12 +9,21 @@ import os
 import requests
 def main():
-    st.title("AI Candidate Screening App")
     job_description = st.text_area("Enter Job Description")
-    uploaded_files = st.file_uploader("Upload Resumes (PDF)", accept_multiple_files=True, type=["pdf"])
-    if st.button("Process Resumes"):
-        shortlisted = process_resumes(uploaded_files, job_description)
         for candidate in shortlisted:
             st.write(f"**{candidate['name']}** - Score: {candidate['score']}")

 import streamlit as st
+from utils import evaluate_resumes, generate_pdf_report, store_in_supabase, score_candidate, extract_email, parse_resume
 from config import supabase
+from config import HF_API_TOKEN, HF_HEADERS, HF_MODELS
 import fitz  # PyMuPDF
 from io import BytesIO
 from dotenv import load_dotenv
 import requests
 def main():
+    st.set_page_config(page_title="TalentLens.AI", layout="centered")
+    st.markdown(
+        "<h1 style='text-align: center;'>TalentLens.AI</h1>",
+        unsafe_allow_html=True
+    )
+    st.divider()
+    st.markdown(
+        "<h3 style='text-align: center;'>AI-Powered Intelligent Resume Screening</h3>",
+        unsafe_allow_html=True
+    )
+    uploaded_files = st.file_uploader("Upload Resumes (PDF Only)", accept_multiple_files=True, type=["pdf"])
     job_description = st.text_area("Enter Job Description")
+    if st.button("Evaluate Resumes"):
+        shortlisted = evaluate_resumes(uploaded_files, job_description)
         for candidate in shortlisted:
             st.write(f"**{candidate['name']}** - Score: {candidate['score']}")

requirements.txt CHANGED Viewed

@@ -1,10 +1,214 @@
-streamlit
-langchain
-psycopg2
-sqlalchemy
-PyPDF2
-resume-parser
-python-dotenv
-fitz
-requests
-reportlab

+aiohappyeyeballs==2.6.1
+aiohttp==3.11.13
+aiosignal==1.3.2
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.8.0
+appdirs==1.4.4
+asgiref==3.8.1
+asttokens==3.0.0
+attrs==25.2.0
+auth0-python==4.8.1
+backoff==2.2.1
+bcrypt==4.3.0
+blinker==1.9.0
+blis==1.2.0
+build==1.2.2.post1
+cachetools==5.5.2
+catalogue==2.0.10
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+chroma-hnswlib==0.7.6
+chromadb==0.6.3
+click==8.1.8
+cloudpathlib==0.21.0
+coloredlogs==15.0.1
+confection==0.1.5
+crewai==0.105.0
+cryptography==44.0.2
+cymem==2.0.11
+decorator==5.2.1
+Deprecated==1.2.18
+deprecation==2.1.0
+distro==1.9.0
+docstring_parser==0.16
+docx2txt==0.8
+durationpy==0.9
+et_xmlfile==2.0.0
+executing==2.2.0
+fastapi==0.115.11
+filelock==3.17.0
+flatbuffers==25.2.10
+frozenlist==1.5.0
+fsspec==2025.3.0
+gitdb==4.0.12
+GitPython==3.1.44
+google-auth==2.38.0
+googleapis-common-protos==1.69.1
+gotrue==2.11.4
+greenlet==3.1.1
+grpcio==1.71.0
+h11==0.14.0
+h2==4.2.0
+hpack==4.1.0
+httpcore==1.0.7
+httptools==0.6.4
+httpx==0.27.2
+huggingface-hub==0.29.3
+humanfriendly==10.0
+hyperframe==6.1.0
+idna==3.10
+importlib_metadata==8.6.1
+importlib_resources==6.5.2
+iniconfig==2.1.0
+instructor==1.7.4
+ipython==9.0.2
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+Jinja2==3.1.6
+jiter==0.8.2
+joblib==1.4.2
+json5==0.10.0
+json_repair==0.39.1
+jsonpatch==1.33
+jsonpickle==4.0.2
+jsonpointer==3.0.0
+jsonref==1.1.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kubernetes==32.0.1
+langchain==0.3.20
+langchain-core==0.3.45
+langchain-text-splitters==0.3.6
+langcodes==3.5.0
+langsmith==0.3.15
+language_data==1.3.0
+litellm==1.60.2
+marisa-trie==1.2.1
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mmh3==5.1.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.1.0
+murmurhash==1.0.12
+narwhals==1.30.0
+networkx==3.4.2
+nltk==3.9.1
+numpy==2.2.3
+oauthlib==3.2.2
+onnxruntime==1.16.3
+openai==1.66.3
+openpyxl==3.1.5
+opentelemetry-api==1.31.0
+opentelemetry-exporter-otlp-proto-common==1.31.0
+opentelemetry-exporter-otlp-proto-grpc==1.31.0
+opentelemetry-exporter-otlp-proto-http==1.31.0
+opentelemetry-instrumentation==0.52b0
+opentelemetry-instrumentation-asgi==0.52b0
+opentelemetry-instrumentation-fastapi==0.52b0
+opentelemetry-proto==1.31.0
+opentelemetry-sdk==1.31.0
+opentelemetry-semantic-conventions==0.52b0
+opentelemetry-util-http==0.52b0
+orjson==3.10.15
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pdfminer.six==20231228
+pdfplumber==0.11.5
+pexpect==4.9.0
+phonenumbers==9.0.1
+pillow==11.1.0
+pluggy==1.5.0
+postgrest==0.19.3
+posthog==3.19.1
+preshed==3.0.9
+prompt_toolkit==3.0.50
+propcache==0.3.0
+protobuf==5.29.3
+psycopg2==2.9.10
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==14.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydeck==0.9.1
+Pygments==2.19.1
+PyJWT==2.10.1
+PyMuPDF==1.25.4
+PyPDF2==3.0.1
+pypdfium2==4.30.1
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+pytest==8.3.5
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2025.1
+pyvis==0.3.2
+PyYAML==6.0.2
+realtime==2.4.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+resume-parser==0.8.4
+rich==13.9.4
+rpds-py==0.23.1
+rsa==4.9
+shellingham==1.5.4
+six==1.17.0
+smart-open==7.1.0
+smmap==5.0.2
+sniffio==1.3.1
+spacy==3.8.4
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+SQLAlchemy==2.0.39
+srsly==2.5.1
+stack-data==0.6.3
+starlette==0.46.1
+stemming==1.0.1
+storage3==0.11.3
+streamlit==1.43.2
+StrEnum==0.4.15
+supabase==2.13.0
+supafunc==0.9.3
+sympy==1.13.3
+tenacity==9.0.0
+thinc==8.3.4
+tika==2.6.0
+tiktoken==0.9.0
+tokenizers==0.21.0
+toml==0.10.2
+tomli==2.2.1
+tomli_w==1.2.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+typer==0.15.2
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uv==0.6.6
+uvicorn==0.34.0
+uvloop==0.21.0
+wasabi==1.1.3
+watchdog==6.0.0
+watchfiles==1.0.4
+wcwidth==0.2.13
+weasel==0.4.1
+websocket-client==1.8.0
+websockets==14.2
+wrapt==1.17.2
+yarl==1.18.3
+zipp==3.21.0
+zstandard==0.23.0

utils.py CHANGED Viewed

@@ -4,11 +4,34 @@ import json
 import re
 from io import BytesIO
 import supabase
-from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_API_URL, HF_HEADERS, supabase
-#from config import supabase
 # These functions will be called in the main.py file
 def parse_resume(pdf_file):
     """Extracts text from a resume PDF."""
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
@@ -20,70 +43,91 @@ def extract_email(resume_text):
     match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
     return match.group(0) if match else None
 def score_candidate(resume_text, job_description):
-    """Sends resume and job description to Hugging Face for scoring."""
     payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
-    response = requests.post(HF_API_URL, headers=HF_HEADERS, json=payload)
-    # Debugging: Print response
-    if response.status_code != 200:
-        print(f"Error: {response.status_code}, {response.text}")  # Log any errors
-        return 0  # Return default score if API fails
     try:
-        return response.json().get("score", 0)
-    except requests.exceptions.JSONDecodeError:
-        print("Failed to decode JSON response:", response.text)  # Debugging output
-        return 0  # Return default score if JSON decoding fails
 def store_in_supabase(resume_text, score, candidate_name, email, summary):
-    """Stores candidate data in Supabase"""
     data = {
         "name": candidate_name,
         "resume": resume_text,
         "score": score,
         "email": email,
-        "summary": summary  # Store summary in Supabase
     }
     response = supabase.table("candidates").insert(data).execute()
-    print("Inserted into Supabase:", response)  # Debugging output
 def generate_pdf_report(shortlisted_candidates):
     """Generates a PDF summary of shortlisted candidates."""
     pdf = BytesIO()
     doc = fitz.open()
     for candidate in shortlisted_candidates:
         page = doc.new_page()
-        summary = candidate.get("summary", "No summary available")  # Avoid KeyError
         page.insert_text(
             (50, 50),
-            f"Candidate: {candidate['name']}\nEmail: {candidate['email']}\nScore: {candidate['score']}\nSummary: {summary}"
         )
     doc.save(pdf)
     pdf.seek(0)
-    return pdf
-def process_resumes(uploaded_files, job_description):
-    """Processes uploaded resumes and returns shortlisted candidates."""
-    candidates = []
-    for pdf_file in uploaded_files:
-        resume_text = parse_resume(pdf_file)
-        score = score_candidate(resume_text, job_description)
-        email = extract_email(resume_text)
-        # Generate summary (replace with actual summarization logic later)
-        summary = f"{pdf_file.name} has a score of {score} for this job."
-        candidates.append({
-            "name": pdf_file.name,
-            "resume": resume_text,
-            "score": score,
-            "email": email,
-            "summary": summary
-        })
-        # Store all details including summary in Supabase
-        store_in_supabase(resume_text, score, pdf_file.name, email, summary)
-    return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5]  # Return top 5 candidates

 import re
 from io import BytesIO
 import supabase
+from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
 # These functions will be called in the main.py file
+def evaluate_resumes(uploaded_files, job_description):
+    """Evaluates uploaded resumes and returns shortlisted candidates."""
+    candidates = []
+    for pdf_file in uploaded_files:
+        resume_text = parse_resume(pdf_file)
+        score = score_candidate(resume_text, job_description)
+        email = extract_email(resume_text)
+        # Generate a summary of the resume
+        summary = summarize_resume(resume_text)
+        candidates.append({
+            "name": pdf_file.name,
+            "resume": resume_text,
+            "score": score,
+            "email": email,
+            "summary": summary
+        })
+        # Store all details including summary in Supabase
+        store_in_supabase(resume_text, score, pdf_file.name, email, summary)
+    return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5]  # Return top 5 candidates
 def parse_resume(pdf_file):
     """Extracts text from a resume PDF."""
     doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
     match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
     return match.group(0) if match else None
+# Test on why score 0 is returned even though resume matches key words
+# score_candidate function will use HuggingFace gemini model
 def score_candidate(resume_text, job_description):
+    """
+    Scores the candidate's resume based on the job description using the Hugging Face API.
+    :param resume_text: The extracted resume text.
+    :param job_description: The job description for comparison.
+    :return: A numerical score (default 0 if scoring fails).
+    """
     payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
+    response_gemma = query(payload, model="gemma")  # Use Google Gemma Model for scoring
+    if response_gemma is None:
+        return 0  # Return 0 if API call fails
+    try:
+        return float(response_gemma.get("score", 0))  # Ensure score is always a float
+    except (TypeError, ValueError):
+        return 0  # Return 0 if score parsing fails
+# summarize_resume function will use HuggingFace BART model
+def summarize_resume(resume_text):
+    """
+    Summarizes the resume using Facebook's BART-Large-CNN model.
+    :param resume_text: The extracted resume text.
+    :return: A summarized version of the resume or an error message.
+    """
+    payload = {"inputs": resume_text}
+    response_bart = query(payload, model="bart")
+    if response_bart is None:
+        return "Summary could not be generated."  # Handle API failures gracefully
     try:
+        summary = response_bart[0].get("summary_text", "Summary not available.")
+        return summary
+    except (IndexError, KeyError):
+        return "Summary not available."
 def store_in_supabase(resume_text, score, candidate_name, email, summary):
+    """
+    Stores resume data in Supabase.
+    :param resume_text: The extracted resume text.
+    :param score: The candidate's score (must be a valid number).
+    :param candidate_name: The candidate's name.
+    :param email: Candidate's email address.
+    :param summary: A summarized version of the resume.
+    """
+    if score is None:
+        score = 0  # Ensure score is never NULL
     data = {
         "name": candidate_name,
         "resume": resume_text,
         "score": score,
         "email": email,
+        "summary": summary
     }
     response = supabase.table("candidates").insert(data).execute()
+    return response
+# Test with 10 resumes, if they will be shortlisted
 def generate_pdf_report(shortlisted_candidates):
     """Generates a PDF summary of shortlisted candidates."""
     pdf = BytesIO()
     doc = fitz.open()
     for candidate in shortlisted_candidates:
         page = doc.new_page()
+        # Use the stored summary, or provide a fallback
+        summary = candidate.get("summary", "No summary available")
         page.insert_text(
             (50, 50),
+            f"Candidate: {candidate['name']}\n"
+            f"Email: {candidate['email']}\n"
+            f"Score: {candidate['score']}\n"
+            f"Summary: {summary}"
         )
     doc.save(pdf)
     pdf.seek(0)
+    return pdf