Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import fitz # PyMuPDF | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from spellchecker import SpellChecker # Import the SpellChecker class from pyspellchecker | |
| import tempfile | |
| import pandas as pd | |
| st.title("RESUME RANKER") | |
| # User input for skills | |
| skills = st.text_input("Enter Skills (comma-separated):") | |
| # User input for job description | |
| job_description = st.text_area("Enter Job Description:") | |
| # User input for uploading multiple PDF resumes | |
| pdf_resumes = st.file_uploader("Upload Resumes/CVs", type=["pdf"], accept_multiple_files=True) | |
| if st.button("Rank Resumes"): | |
| if not pdf_resumes: | |
| st.warning("Please upload PDF resumes.") | |
| else: | |
| skills = [skill.strip() for skill in skills.split(',')] | |
| job_description = job_description.lower() | |
| resume_data = [] | |
| # Function to extract text from a PDF file using PyMuPDF (fitz) | |
| def extract_text_from_pdf(pdf_path): | |
| text = "" | |
| with fitz.open(pdf_path) as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Loop through all uploaded PDF resumes and extract text | |
| for pdf_resume in pdf_resumes: | |
| if pdf_resume.type == "application/pdf": | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_pdf: | |
| temp_pdf.write(pdf_resume.read()) | |
| temp_pdf_name = temp_pdf.name | |
| pdf_text = extract_text_from_pdf(temp_pdf_name).lower() | |
| resume_data.append((pdf_resume.name, pdf_text)) | |
| os.remove(temp_pdf_name) | |
| if not resume_data: | |
| st.warning("No PDF resumes found in the uploaded files.") | |
| else: | |
| resume_rankings = [] | |
| # Loop through resumes and check for the presence of input skills | |
| for resume_name, resume_text in resume_data: | |
| matching_skills = [skill for skill in skills if skill.lower() in resume_text] | |
| similarity_score =( len(matching_skills) / len(skills) )# Calculate a simple similarity score | |
| missing_skills = [skill for skill in skills if skill.lower() not in resume_text] | |
| # Calculate the cosine similarity between job description and resume | |
| tfidf_vectorizer = TfidfVectorizer() | |
| job_description_matrix = tfidf_vectorizer.fit_transform([job_description]) | |
| resume_matrix = tfidf_vectorizer.transform([resume_text]) | |
| job_description_similarity = cosine_similarity(job_description_matrix, resume_matrix) | |
| job_description_similarity = (job_description_similarity[0][0]) | |
| similarity_score = round(similarity_score * 100, 2) | |
| job_description_similarity = round(job_description_similarity * 100, 2) | |
| resume_rankings.append((resume_name, f"{similarity_score}%", f"{job_description_similarity}%", missing_skills)) | |
| # Sort the resumes by similarity score in descending order | |
| resume_rankings.sort(key=lambda x: x[1], reverse=True) | |
| # Create a DataFrame to display the results | |
| df = pd.DataFrame(resume_rankings, columns=["File Name", "Skills Match ", "Job Description Match ", "Missing Skills"]) | |
| st.subheader("Ranked Resumes:") | |
| st.dataframe(df) |