import spacy import streamlit as st import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords import requests import re import pandas as pd import fitz # PyMuPDF import PyPDF2 from PIL import Image import pytesseract from pdf2image import convert_from_path import docx # For DOCX processing import io import os # Download necessary NLTK data nltk.download('punkt') nltk.download('stopwords') # Download and load the SpaCy model if not already available try: nlp = spacy.load("en_core_web_sm") except OSError: from spacy.cli import download download("en_core_web_sm") # Downloads the model nlp = spacy.load("en_core_web_sm") # Loads the model after downloading # Function for PyMuPDF text extraction def extract_text_with_pymupdf(pdf_file): """Extract text using PyMuPDF (fitz)""" text = "" pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") for page_num in range(len(pdf_document)): page = pdf_document[page_num] text += page.get_text() pdf_document.close() return text # Function for PyPDF2 text extraction def extract_text_with_pypdf2(pdf_file): """Extract text using PyPDF2""" text = "" pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() return text # Function for OCR extraction (for scanned PDFs) def extract_text_with_ocr(pdf_file): """Extract text using OCR (for scanned PDFs)""" text = "" images = convert_from_path(pdf_file.name) # Convert PDF pages to images for image in images: text += pytesseract.image_to_string(image) return text # Function for DOCX text extraction def extract_text_from_docx(docx_file): """Extract text from a DOCX file.""" doc = docx.Document(docx_file) text = '\n'.join([para.text for para in doc.paragraphs]) return text # Unified PDF extraction function def extract_text_from_pdf(pdf_file): """Extract text using multiple methods.""" text = "" # Attempt PyMuPDF extraction try: text = extract_text_with_pymupdf(pdf_file) if text.strip(): # If PyMuPDF returns meaningful text return text except Exception as e: print(f"Error with PyMuPDF: {e}") # Reset file pointer pdf_file.seek(0) # Attempt PyPDF2 extraction try: text = extract_text_with_pypdf2(pdf_file) if text.strip(): # If PyPDF2 returns meaningful text return text except Exception as e: print(f"Error with PyPDF2: {e}") # Reset file pointer pdf_file.seek(0) # Attempt OCR as a last resort try: text = extract_text_with_ocr(pdf_file) if text.strip(): # If OCR returns meaningful text return text except Exception as e: print(f"Error with OCR: {e}") return text # Return empty text if all methods fail # Function to clean and normalize text def clean_and_normalize_text(text): """Clean and normalize the resume/job description text.""" # Tokenization tokens = word_tokenize(text) # Lowercasing and removing non-alphabetical tokens tokens = [word.lower() for word in tokens if word.isalpha()] # Removing stopwords using NLTK stop_words = set(stopwords.words("english")) filtered_tokens = [word for word in tokens if word not in stop_words] # Lemmatization using SpaCy doc = nlp(' '.join(filtered_tokens)) lemmatized_tokens = [token.lemma_ for token in doc] # Reconstruct the cleaned text cleaned_text = ' '.join(lemmatized_tokens) # Optionally, remove extra spaces or characters cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() return cleaned_text # Function for Named Entity Recognition (NER) def extract_named_entities(text): """Extract named entities from text using SpaCy.""" doc = nlp(text) # Extract named entities entities = [(ent.text, ent.label_) for ent in doc.ents] return entities # Function to analyze the resume and job description using Gemini 1.5 Flash model def analyze_documents(resume_text, job_description): """Analyze resume text against the job description using Gemini 1.5 Flash.""" custom_prompt = f""" Please analyze the following resume in the context of the job description provided. For the match percentage, please consider: - The relevance of the hard skills mentioned. - The match of experiences and achievements listed in the resume. - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description. Job Description: {job_description} Resume: {resume_text} """ API_KEY = os.getenv("GEMINI_API_KEY") # Ensure you set this environment variable securely if not API_KEY: return {"Match Percentage": "API Key Missing", "Recommendations": "Please set the GEMINI_API_KEY environment variable."} url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}" headers = {'Content-Type': 'application/json'} data = { "contents": [ {"role": "user", "parts": [{"text": custom_prompt}]} ] } response = requests.post(url, headers=headers, json=data) if response.status_code == 200: try: result = response.json() # Parse the response as needed. This is a placeholder. return { "Match Percentage": result.get('choices', [{}])[0].get('text', 'N/A').strip(), "Recommendations": "Placeholder for actual recommendations." } except ValueError: return {"Match Percentage": "Error", "Recommendations": "Failed to parse response."} else: return {"Match Percentage": "Error", "Recommendations": f"API request failed with status code {response.status_code}."} # Streamlit app configuration st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide") # Header Section st.markdown( """ """, unsafe_allow_html=True ) st.markdown('
📝🔍🌟 ATS Resume Evaluation System
', unsafe_allow_html=True) st.markdown('
Upload your resume and job description for analysis
', unsafe_allow_html=True) # Inputs: Job description and resume file upload job_description = st.text_area("Enter the Job Description:", height=250) resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"]) # Process the uploaded resume and job description if resume_file: if job_description: try: if resume_file.type == "application/pdf": resume_text = extract_text_from_pdf(resume_file) elif resume_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": resume_text = extract_text_from_docx(resume_file) else: st.error("Unsupported file type.") resume_text = "" if resume_text: cleaned_resume = clean_and_normalize_text(resume_text) cleaned_job_description = clean_and_normalize_text(job_description) # Analyze the resume and job description result = analyze_documents(cleaned_resume, cleaned_job_description) # Display the analysis results st.write(f"**Match Percentage**: {result.get('Match Percentage', 'N/A')}") st.write(f"**Recommendations**: {result.get('Recommendations', 'N/A')}") else: st.error("Failed to extract text from the uploaded file.") except Exception as e: st.error(f"An error occurred during processing: {e}") else: st.warning("Please enter the job description to begin analysis.")