import streamlit as st from sentence_transformers import SentenceTransformer, util import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize # Download stop words if not already available nltk.download("stopwords") nltk.download("punkt") # Load English stop words stop_words = set(stopwords.words("english")) @st.cache_resource def load_model(): return SentenceTransformer('sentence-transformers/all-mpnet-base-v2') model = load_model() # Synonym dictionary for common terms synonyms = { "data analysis": {"data analytics", "data analyst"}, "machine learning": {"ml", "artificial intelligence", "ai"}, "programming": {"coding", "development", "software engineering"}, "statistical analysis": {"statistics", "statistical modeling"}, "visualization": {"data viz", "tableau", "visualizing data"} } def preprocess(text): # Tokenize, remove stop words, and normalize text words = word_tokenize(text.lower()) filtered_words = [word for word in words if word.isalnum() and word not in stop_words] normalized_text = " ".join(filtered_words) return normalized_text def synonym_match(job_desc, resume): match_count = 0 total_keywords = 0 for key, variants in synonyms.items(): job_contains = any(term in job_desc for term in variants) or key in job_desc resume_contains = any(term in resume for term in variants) or key in resume if job_contains: total_keywords += 1 if resume_contains: match_count += 1 return (match_count / total_keywords) * 100 if total_keywords > 0 else 0 def keyword_match(job_desc, resume): job_keywords = set(re.findall(r'\b\w+\b', job_desc)) resume_keywords = set(re.findall(r'\b\w+\b', resume)) common_keywords = job_keywords.intersection(resume_keywords) return (len(common_keywords) / len(job_keywords)) * 100 if job_keywords else 0 st.title("Advanced Resume and Job Description Similarity Checker") job_description = st.text_area("Paste the job description here:", height=200) resume_text = st.text_area("Paste your resume here:", height=200) if st.button("Compare"): if job_description.strip() and resume_text.strip(): # Preprocess text processed_job_desc = preprocess(job_description) processed_resume = preprocess(resume_text) # Calculate embeddings-based similarity job_description_embedding = model.encode(processed_job_desc) resume_embedding = model.encode(processed_resume) similarity_score = util.cos_sim(job_description_embedding, resume_embedding).item() * 100 # Calculate keyword-based similarity keyword_score = keyword_match(processed_job_desc, processed_resume) # Calculate synonym-based similarity synonym_score = synonym_match(processed_job_desc, processed_resume) # Combine scores (adjusting weights as needed) overall_score = (similarity_score * 0.5) + (keyword_score * 0.3) + (synonym_score * 0.2) st.write(f"**Overall Similarity Score:** {overall_score:.2f}%") # Adjusted feedback based on combined score if overall_score > 80: st.success("Excellent match! Your resume closely aligns with the job description.") elif overall_score > 65: st.info("Strong match! Your resume aligns well, but a few minor tweaks could help.") elif overall_score > 50: st.warning("Moderate match. Your resume has some relevant information, but consider emphasizing key skills.") elif overall_score > 35: st.error("Low match. Your resume does not align well. Consider revising to highlight key skills.") else: st.error("Very low match. Your resume is significantly different from the job description. Major revisions may be needed.") else: st.error("Please paste both the job description and your resume to proceed.")