Spaces:
Build error
Build error
| import streamlit as st | |
| from io import BytesIO | |
| import tempfile | |
| from PyPDF2 import PdfReader | |
| import spacy | |
| from pyresparser import ResumeParser | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| # Load the spaCy model for natural language processing | |
| nlp = spacy.load('en_core_web_sm') | |
| # Function to extract text from PDF resumes | |
| def extract_text_from_pdf(file): | |
| text = "" | |
| pdf_reader = PdfReader(file) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to preprocess text using spaCy | |
| def preprocess_text(text): | |
| doc = nlp(text) | |
| tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha] | |
| return " ".join(tokens) | |
| # Function to preprocess and combine the relevant resume fields | |
| def preprocess_resume_data(resume_data): | |
| skills = " ".join(resume_data.get('skills', [])) if resume_data.get('skills') else "" | |
| experience = " ".join(resume_data.get('experience', [])) if resume_data.get('experience') else "" | |
| degree = " ".join(resume_data.get('degree', [])) if resume_data.get('degree') else "" | |
| combined_data = f"{skills} {experience} {degree}" | |
| return preprocess_text(combined_data) | |
| # Main function to create the Streamlit app | |
| def main(): | |
| st.title("Resume Ranker and Prescreening Software") | |
| st.write("Upload resumes (in PDF format) and enter job descriptions or keywords to filter and rank them.") | |
| # Upload resumes | |
| uploaded_files = st.file_uploader("Upload Resumes (PDF files)", accept_multiple_files=True) | |
| # Input field for job description or keywords | |
| job_description = st.text_area("Enter Job Description or Keywords") | |
| if st.button("Rank Resumes"): | |
| if not uploaded_files: | |
| st.warning("Please upload one or more resumes.") | |
| return | |
| if not job_description: | |
| st.warning("Please enter a job description or keywords.") | |
| return | |
| # Preprocess the job description | |
| job_description_processed = preprocess_text(job_description) | |
| # Vectorize the job description and resumes | |
| vectorizer = TfidfVectorizer() | |
| job_vec = vectorizer.fit_transform([job_description_processed]) | |
| # List to store responses along with file names and their matching percentages | |
| file_responses = [] | |
| # Loop through uploaded resumes | |
| for file in uploaded_files: | |
| # Read the uploaded PDF file into memory | |
| pdf_data = BytesIO(file.read()) | |
| # Save the PDF data to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file: | |
| temp_file.write(pdf_data.getvalue()) | |
| temp_file_path = temp_file.name | |
| # Parse and preprocess resumes | |
| data = ResumeParser(temp_file_path).get_extracted_data() | |
| if data: | |
| combined_resume_data = preprocess_resume_data(data) | |
| resume_vec = vectorizer.transform([combined_resume_data]) | |
| similarity = (resume_vec * job_vec.T).A[0][0] * 100 | |
| # Append file name and similarity to the list | |
| file_responses.append((file.name, similarity)) | |
| # Delete the temporary file | |
| temp_file.close() | |
| # Sort file responses based on the similarity in descending order | |
| file_responses.sort(key=lambda x: x[1], reverse=True) | |
| # Display sorted file names and similarity percentages | |
| st.header("Ranked Resumes") | |
| for file_name, similarity in file_responses: | |
| st.write(f"Resume: {file_name}, Match Percentage: {similarity:.2f}%") | |
| if __name__ == "__main__": | |
| main() | |