File size: 3,339 Bytes
ffd40da
 
 
 
 
 
 
 
fa1459c
 
 
 
 
 
 
 
 
 
 
ffd40da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import streamlit as st
import pdfplumber
from fuzzywuzzy import fuzz
from sklearn.metrics.pairwise import cosine_similarity
import spacy

# Load the SpaCy model
#nlp = spacy.load("en_core_web_sm")

import spacy
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # If the model is not found, download it
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")


# Function to extract entities from text
def extract_entities(text):
    doc = nlp(text)
    entities = {ent.label_: ent.text for ent in doc.ents}
    return entities

# Function to compute matching score
def compute_advanced_matching_score(cv_text, cv_entities, required_education, required_skills, required_experience):
    score = 0

    # Named Entity Recognition Matching for Education
    education = cv_entities.get('EDU', '')
    score += fuzz.token_set_ratio(education, required_education) / 100

    # Fuzzy Matching for Skills
    for skill in required_skills:
        max_skill_match_score = max([fuzz.token_set_ratio(skill, skill_in_cv) for skill_in_cv in cv_text.split()] + [0])
        score += max_skill_match_score / 100

    # Vector Similarity Matching for Experience
    experience_text = cv_entities.get('DATE', '')
    doc1 = nlp(experience_text)
    doc2 = nlp(f"{required_experience} years")
    score += cosine_similarity(doc1.vector.reshape(1, -1), doc2.vector.reshape(1, -1))[0][0]

    return score

# Function to process CVs and compute scores
def process_cvs(uploaded_files, required_education, required_skills, required_experience, top_cvs_count):
    cv_scores = {}
    for uploaded_file in uploaded_files:
        file_extension = uploaded_file.name.split('.')[-1]
        if file_extension in ["pdf"]:
            with pdfplumber.open(uploaded_file) as pdf:
                text = ''
                for page in pdf.pages:
                    text += page.extract_text()
                entities = extract_entities(text)
                cv_scores[uploaded_file.name] = compute_advanced_matching_score(text, entities, required_education, required_skills, required_experience)

    top_cvs = sorted(cv_scores.items(), key=lambda x: x[1], reverse=True)[:top_cvs_count]
    return top_cvs

def main():
    st.markdown('<style>h1{text-align:center;}</style>', unsafe_allow_html=True)  # Center-align the title
    st.title("Resume Filtering App")
    

    uploaded_files = st.file_uploader("Upload Resume Files", type=["pdf"], accept_multiple_files=True)
    required_education = st.text_input("Required Education")
    required_skills = st.text_input("Required Skills (comma-separated)")
    required_experience = st.text_input("Required Experience")
    top_cvs_count = st.number_input("Number of Top Resume to Display", min_value=1, step=1, value=3)

    if st.button("Match Resume"):
        if uploaded_files and required_education and required_skills and required_experience:
            required_skills = [skill.strip() for skill in required_skills.split(',')]
            top_cvs = process_cvs(uploaded_files, required_education, required_skills, required_experience, top_cvs_count)
            st.subheader("Top Matching Resume:")
            for filename, score in top_cvs:
                st.write(f"{filename}: {score:.2f}")

if __name__ == "__main__":
    main()