import os import streamlit as st import pdfplumber from fuzzywuzzy import fuzz from sklearn.metrics.pairwise import cosine_similarity import spacy # Load the SpaCy model nlp = spacy.load("en_core_web_sm") # Function to extract entities from text def extract_entities(text): doc = nlp(text) entities = {ent.label_: ent.text for ent in doc.ents} return entities # Function to compute matching score def compute_advanced_matching_score(cv_text, cv_entities, required_education, required_skills, required_experience): score = 0 # Named Entity Recognition Matching for Education education = cv_entities.get('EDU', '') score += fuzz.token_set_ratio(education, required_education) / 100 # Fuzzy Matching for Skills for skill in required_skills: max_skill_match_score = max([fuzz.token_set_ratio(skill, skill_in_cv) for skill_in_cv in cv_text.split()] + [0]) score += max_skill_match_score / 100 # Vector Similarity Matching for Experience experience_text = cv_entities.get('DATE', '') doc1 = nlp(experience_text) doc2 = nlp(f"{required_experience} years") score += cosine_similarity(doc1.vector.reshape(1, -1), doc2.vector.reshape(1, -1))[0][0] return score # Function to process CVs and compute scores def process_cvs(uploaded_files, required_education, required_skills, required_experience, top_cvs_count): cv_scores = {} for uploaded_file in uploaded_files: file_extension = uploaded_file.name.split('.')[-1] if file_extension in ["pdf"]: with pdfplumber.open(uploaded_file) as pdf: text = '' for page in pdf.pages: text += page.extract_text() entities = extract_entities(text) cv_scores[uploaded_file.name] = compute_advanced_matching_score(text, entities, required_education, required_skills, required_experience) top_cvs = sorted(cv_scores.items(), key=lambda x: x[1], reverse=True)[:top_cvs_count] return top_cvs def main(): st.markdown('', unsafe_allow_html=True) # Center-align the title st.title("Resume Filtering App") uploaded_files = st.file_uploader("Upload Resume Files", type=["pdf"], accept_multiple_files=True) required_education = st.text_input("Required Education") required_skills = st.text_input("Required Skills (comma-separated)") required_experience = st.text_input("Required Experience") top_cvs_count = st.number_input("Number of Top Resume to Display", min_value=1, step=1, value=3) if st.button("Match Resume"): if uploaded_files and required_education and required_skills and required_experience: required_skills = [skill.strip() for skill in required_skills.split(',')] top_cvs = process_cvs(uploaded_files, required_education, required_skills, required_experience, top_cvs_count) st.subheader("Top Matching Resume:") for filename, score in top_cvs: st.write(f"{filename}: {score:.2f}") if __name__ == "__main__": main()