Spaces:
Runtime error
Runtime error
raj22rishi
commited on
Commit
•
d0346c6
1
Parent(s):
4726d69
Upload 2 files
Browse files- app.py +73 -0
- requirements.txt +69 -0
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
import spacy
|
5 |
+
|
6 |
+
# Load spaCy model
|
7 |
+
nlp = spacy.load('en_core_web_sm')
|
8 |
+
|
9 |
+
# Function to extract text from PDF resumes
|
10 |
+
def extract_text_from_pdf(file):
|
11 |
+
text = ""
|
12 |
+
pdf_reader = PdfReader(file)
|
13 |
+
for page in pdf_reader.pages:
|
14 |
+
text += page.extract_text()
|
15 |
+
return text
|
16 |
+
|
17 |
+
# Function to preprocess text using spaCy
|
18 |
+
def preprocess_text(text):
|
19 |
+
doc = nlp(text)
|
20 |
+
tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
|
21 |
+
return " ".join(tokens)
|
22 |
+
|
23 |
+
# Main function to create the Streamlit app
|
24 |
+
def main():
|
25 |
+
st.title("Resume Ranker and Prescreening Software")
|
26 |
+
st.write("Upload resumes (in PDF format) and enter job descriptions or keywords to filter and rank them.")
|
27 |
+
|
28 |
+
# Upload resumes
|
29 |
+
uploaded_files = st.file_uploader("Upload Resumes (PDF files)", accept_multiple_files=True)
|
30 |
+
|
31 |
+
# Input field for job description or keywords
|
32 |
+
job_description = st.text_area("Enter Job Description or Keywords")
|
33 |
+
|
34 |
+
if st.button("Rank Resumes"):
|
35 |
+
if not uploaded_files:
|
36 |
+
st.warning("Please upload one or more resumes.")
|
37 |
+
return
|
38 |
+
|
39 |
+
if not job_description:
|
40 |
+
st.warning("Please enter a job description or keywords.")
|
41 |
+
return
|
42 |
+
|
43 |
+
# Preprocess the job description
|
44 |
+
job_description_processed = preprocess_text(job_description)
|
45 |
+
|
46 |
+
# Vectorize job description and resumes
|
47 |
+
vectorizer = TfidfVectorizer()
|
48 |
+
job_vec = vectorizer.fit_transform([job_description_processed])
|
49 |
+
resume_texts = []
|
50 |
+
resume_vecs = []
|
51 |
+
for file in uploaded_files:
|
52 |
+
# Parse and preprocess resumes
|
53 |
+
text = extract_text_from_pdf(file)
|
54 |
+
text_processed = preprocess_text(text)
|
55 |
+
resume_texts.append(text_processed)
|
56 |
+
resume_vecs.append(vectorizer.transform([text_processed]))
|
57 |
+
|
58 |
+
# Calculate similarity scores
|
59 |
+
similarities = []
|
60 |
+
for i, resume_vec in enumerate(resume_vecs):
|
61 |
+
similarity = (resume_vec * job_vec.T).A[0][0]
|
62 |
+
similarities.append((uploaded_files[i].name, similarity))
|
63 |
+
|
64 |
+
# Sort resumes by similarity
|
65 |
+
similarities.sort(key=lambda x: x[1], reverse=True)
|
66 |
+
|
67 |
+
# Display sorted resumes with matching percentage
|
68 |
+
st.header("Ranked Resumes")
|
69 |
+
for resume, similarity in similarities:
|
70 |
+
st.write(f"Resume: {resume}, Match Percentage: {similarity * 100:.2f}%")
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.3.0
|
2 |
+
annotated-types==0.7.0
|
3 |
+
attrs==23.2.0
|
4 |
+
blinker==1.8.2
|
5 |
+
blis==0.7.11
|
6 |
+
cachetools==5.3.3
|
7 |
+
catalogue==2.0.10
|
8 |
+
certifi==2024.2.2
|
9 |
+
charset-normalizer==3.3.2
|
10 |
+
click==8.1.7
|
11 |
+
cloudpathlib==0.16.0
|
12 |
+
confection==0.1.4
|
13 |
+
cymem==2.0.8
|
14 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
15 |
+
gitdb==4.0.11
|
16 |
+
GitPython==3.1.43
|
17 |
+
idna==3.7
|
18 |
+
Jinja2==3.1.4
|
19 |
+
joblib==1.4.2
|
20 |
+
jsonschema==4.22.0
|
21 |
+
jsonschema-specifications==2023.12.1
|
22 |
+
langcodes==3.4.0
|
23 |
+
language_data==1.2.0
|
24 |
+
marisa-trie==1.1.1
|
25 |
+
markdown-it-py==3.0.0
|
26 |
+
MarkupSafe==2.1.5
|
27 |
+
mdurl==0.1.2
|
28 |
+
murmurhash==1.0.10
|
29 |
+
numpy==1.26.4
|
30 |
+
packaging==24.0
|
31 |
+
pandas==2.2.2
|
32 |
+
pillow==10.3.0
|
33 |
+
preshed==3.0.9
|
34 |
+
protobuf==4.25.3
|
35 |
+
pyarrow==16.1.0
|
36 |
+
pydantic==2.7.1
|
37 |
+
pydantic_core==2.18.2
|
38 |
+
pydeck==0.9.1
|
39 |
+
Pygments==2.18.0
|
40 |
+
PyPDF2==3.0.1
|
41 |
+
python-dateutil==2.9.0.post0
|
42 |
+
pytz==2024.1
|
43 |
+
referencing==0.35.1
|
44 |
+
requests==2.32.2
|
45 |
+
rich==13.7.1
|
46 |
+
rpds-py==0.18.1
|
47 |
+
scikit-learn==1.5.0
|
48 |
+
scipy==1.13.1
|
49 |
+
six==1.16.0
|
50 |
+
smart-open==6.4.0
|
51 |
+
smmap==5.0.1
|
52 |
+
spacy==3.7.4
|
53 |
+
spacy-legacy==3.0.12
|
54 |
+
spacy-loggers==1.0.5
|
55 |
+
srsly==2.4.8
|
56 |
+
streamlit==1.35.0
|
57 |
+
tenacity==8.3.0
|
58 |
+
thinc==8.2.3
|
59 |
+
threadpoolctl==3.5.0
|
60 |
+
toml==0.10.2
|
61 |
+
toolz==0.12.1
|
62 |
+
tornado==6.4
|
63 |
+
tqdm==4.66.4
|
64 |
+
typer==0.9.4
|
65 |
+
typing_extensions==4.12.0
|
66 |
+
tzdata==2024.1
|
67 |
+
urllib3==2.2.1
|
68 |
+
wasabi==1.1.2
|
69 |
+
weasel==0.3.4
|