import streamlit as st
from sentence_transformers import SentenceTransformer, util
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Download stop words if not already available
nltk.download("stopwords")
nltk.download("punkt")

# Load English stop words
stop_words = set(stopwords.words("english"))

@st.cache_resource
def load_model():
    return SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

model = load_model()

# Synonym dictionary for common terms
synonyms = {
    "data analysis": {"data analytics", "data analyst"},
    "machine learning": {"ml", "artificial intelligence", "ai"},
    "programming": {"coding", "development", "software engineering"},
    "statistical analysis": {"statistics", "statistical modeling"},
    "visualization": {"data viz", "tableau", "visualizing data"}
}

def preprocess(text):
    # Tokenize, remove stop words, and normalize text
    words = word_tokenize(text.lower())
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    normalized_text = " ".join(filtered_words)
    return normalized_text

def synonym_match(job_desc, resume):
    match_count = 0
    total_keywords = 0
    
    for key, variants in synonyms.items():
        job_contains = any(term in job_desc for term in variants) or key in job_desc
        resume_contains = any(term in resume for term in variants) or key in resume
        
        if job_contains:
            total_keywords += 1
            if resume_contains:
                match_count += 1
                
    return (match_count / total_keywords) * 100 if total_keywords > 0 else 0

def keyword_match(job_desc, resume):
    job_keywords = set(re.findall(r'\b\w+\b', job_desc))
    resume_keywords = set(re.findall(r'\b\w+\b', resume))
    common_keywords = job_keywords.intersection(resume_keywords)
    return (len(common_keywords) / len(job_keywords)) * 100 if job_keywords else 0

st.title("Advanced Resume and Job Description Similarity Checker")

job_description = st.text_area("Paste the job description here:", height=200)
resume_text = st.text_area("Paste your resume here:", height=200)

if st.button("Compare"):
    if job_description.strip() and resume_text.strip():
        # Preprocess text
        processed_job_desc = preprocess(job_description)
        processed_resume = preprocess(resume_text)

        # Calculate embeddings-based similarity
        job_description_embedding = model.encode(processed_job_desc)
        resume_embedding = model.encode(processed_resume)
        similarity_score = util.cos_sim(job_description_embedding, resume_embedding).item() * 100

        # Calculate keyword-based similarity
        keyword_score = keyword_match(processed_job_desc, processed_resume)

        # Calculate synonym-based similarity
        synonym_score = synonym_match(processed_job_desc, processed_resume)
        
        # Combine scores (adjusting weights as needed)
        overall_score = (similarity_score * 0.5) + (keyword_score * 0.3) + (synonym_score * 0.2)
        
        st.write(f"**Overall Similarity Score:** {overall_score:.2f}%")
        
        # Adjusted feedback based on combined score
        if overall_score > 80:
            st.success("Excellent match! Your resume closely aligns with the job description.")
        elif overall_score > 65:
            st.info("Strong match! Your resume aligns well, but a few minor tweaks could help.")
        elif overall_score > 50:
            st.warning("Moderate match. Your resume has some relevant information, but consider emphasizing key skills.")
        elif overall_score > 35:
            st.error("Low match. Your resume does not align well. Consider revising to highlight key skills.")
        else:
            st.error("Very low match. Your resume is significantly different from the job description. Major revisions may be needed.")
    else:
        st.error("Please paste both the job description and your resume to proceed.")