raj22rishi commited on
Commit
d0346c6
1 Parent(s): 4726d69

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +69 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ import spacy
5
+
6
+ # Load spaCy model
7
+ nlp = spacy.load('en_core_web_sm')
8
+
9
+ # Function to extract text from PDF resumes
10
+ def extract_text_from_pdf(file):
11
+ text = ""
12
+ pdf_reader = PdfReader(file)
13
+ for page in pdf_reader.pages:
14
+ text += page.extract_text()
15
+ return text
16
+
17
+ # Function to preprocess text using spaCy
18
+ def preprocess_text(text):
19
+ doc = nlp(text)
20
+ tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
21
+ return " ".join(tokens)
22
+
23
+ # Main function to create the Streamlit app
24
+ def main():
25
+ st.title("Resume Ranker and Prescreening Software")
26
+ st.write("Upload resumes (in PDF format) and enter job descriptions or keywords to filter and rank them.")
27
+
28
+ # Upload resumes
29
+ uploaded_files = st.file_uploader("Upload Resumes (PDF files)", accept_multiple_files=True)
30
+
31
+ # Input field for job description or keywords
32
+ job_description = st.text_area("Enter Job Description or Keywords")
33
+
34
+ if st.button("Rank Resumes"):
35
+ if not uploaded_files:
36
+ st.warning("Please upload one or more resumes.")
37
+ return
38
+
39
+ if not job_description:
40
+ st.warning("Please enter a job description or keywords.")
41
+ return
42
+
43
+ # Preprocess the job description
44
+ job_description_processed = preprocess_text(job_description)
45
+
46
+ # Vectorize job description and resumes
47
+ vectorizer = TfidfVectorizer()
48
+ job_vec = vectorizer.fit_transform([job_description_processed])
49
+ resume_texts = []
50
+ resume_vecs = []
51
+ for file in uploaded_files:
52
+ # Parse and preprocess resumes
53
+ text = extract_text_from_pdf(file)
54
+ text_processed = preprocess_text(text)
55
+ resume_texts.append(text_processed)
56
+ resume_vecs.append(vectorizer.transform([text_processed]))
57
+
58
+ # Calculate similarity scores
59
+ similarities = []
60
+ for i, resume_vec in enumerate(resume_vecs):
61
+ similarity = (resume_vec * job_vec.T).A[0][0]
62
+ similarities.append((uploaded_files[i].name, similarity))
63
+
64
+ # Sort resumes by similarity
65
+ similarities.sort(key=lambda x: x[1], reverse=True)
66
+
67
+ # Display sorted resumes with matching percentage
68
+ st.header("Ranked Resumes")
69
+ for resume, similarity in similarities:
70
+ st.write(f"Resume: {resume}, Match Percentage: {similarity * 100:.2f}%")
71
+
72
+ if __name__ == "__main__":
73
+ main()
requirements.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.3.0
2
+ annotated-types==0.7.0
3
+ attrs==23.2.0
4
+ blinker==1.8.2
5
+ blis==0.7.11
6
+ cachetools==5.3.3
7
+ catalogue==2.0.10
8
+ certifi==2024.2.2
9
+ charset-normalizer==3.3.2
10
+ click==8.1.7
11
+ cloudpathlib==0.16.0
12
+ confection==0.1.4
13
+ cymem==2.0.8
14
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
15
+ gitdb==4.0.11
16
+ GitPython==3.1.43
17
+ idna==3.7
18
+ Jinja2==3.1.4
19
+ joblib==1.4.2
20
+ jsonschema==4.22.0
21
+ jsonschema-specifications==2023.12.1
22
+ langcodes==3.4.0
23
+ language_data==1.2.0
24
+ marisa-trie==1.1.1
25
+ markdown-it-py==3.0.0
26
+ MarkupSafe==2.1.5
27
+ mdurl==0.1.2
28
+ murmurhash==1.0.10
29
+ numpy==1.26.4
30
+ packaging==24.0
31
+ pandas==2.2.2
32
+ pillow==10.3.0
33
+ preshed==3.0.9
34
+ protobuf==4.25.3
35
+ pyarrow==16.1.0
36
+ pydantic==2.7.1
37
+ pydantic_core==2.18.2
38
+ pydeck==0.9.1
39
+ Pygments==2.18.0
40
+ PyPDF2==3.0.1
41
+ python-dateutil==2.9.0.post0
42
+ pytz==2024.1
43
+ referencing==0.35.1
44
+ requests==2.32.2
45
+ rich==13.7.1
46
+ rpds-py==0.18.1
47
+ scikit-learn==1.5.0
48
+ scipy==1.13.1
49
+ six==1.16.0
50
+ smart-open==6.4.0
51
+ smmap==5.0.1
52
+ spacy==3.7.4
53
+ spacy-legacy==3.0.12
54
+ spacy-loggers==1.0.5
55
+ srsly==2.4.8
56
+ streamlit==1.35.0
57
+ tenacity==8.3.0
58
+ thinc==8.2.3
59
+ threadpoolctl==3.5.0
60
+ toml==0.10.2
61
+ toolz==0.12.1
62
+ tornado==6.4
63
+ tqdm==4.66.4
64
+ typer==0.9.4
65
+ typing_extensions==4.12.0
66
+ tzdata==2024.1
67
+ urllib3==2.2.1
68
+ wasabi==1.1.2
69
+ weasel==0.3.4