Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,198 +1,91 @@
|
|
1 |
-
import
|
2 |
-
import pandas as pd
|
3 |
-
import google.generativeai as genai
|
4 |
-
import PyPDF2 as pdf
|
5 |
-
import io
|
6 |
-
import re
|
7 |
import streamlit as st
|
8 |
-
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
from sklearn.metrics.pairwise import cosine_similarity
|
11 |
-
import
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
-
|
16 |
-
raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
#
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
Args:
|
71 |
-
text: Resume text.
|
72 |
-
Returns:
|
73 |
-
str: Comma-separated skills or "Not Available".
|
74 |
-
"""
|
75 |
-
ner_results = skill_extractor(text)
|
76 |
-
skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
|
77 |
-
return ", ".join(skills) if skills else "Not Available"
|
78 |
-
|
79 |
-
# Function to extract education details
|
80 |
-
def extract_education(text):
|
81 |
-
"""
|
82 |
-
Extract education information using NER and regex.
|
83 |
-
|
84 |
-
Args:
|
85 |
-
text: Resume text.
|
86 |
-
Returns:
|
87 |
-
str: Extracted education details.
|
88 |
-
"""
|
89 |
-
ner_results = education_extractor(text)
|
90 |
-
education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
|
91 |
-
|
92 |
-
if education_entities:
|
93 |
-
return ", ".join(education_entities)
|
94 |
-
else:
|
95 |
-
education_patterns = [
|
96 |
-
r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
|
97 |
-
r"(University of [A-Za-z]+)"
|
98 |
-
]
|
99 |
-
matches = []
|
100 |
-
for pattern in education_patterns:
|
101 |
-
matches.extend(re.findall(pattern, text))
|
102 |
-
return ", ".join(matches) if matches else "Not Available"
|
103 |
-
|
104 |
-
# Function to calculate match percentage using TF-IDF
|
105 |
-
def calculate_match_percentage(resume_text, job_description):
|
106 |
-
"""
|
107 |
-
Calculate the match percentage using TF-IDF and cosine similarity.
|
108 |
-
|
109 |
-
Args:
|
110 |
-
resume_text: Resume text.
|
111 |
-
job_description: Job description.
|
112 |
-
Returns:
|
113 |
-
float: Match percentage (0-100).
|
114 |
-
"""
|
115 |
-
documents = [resume_text, job_description]
|
116 |
-
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
117 |
-
tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
|
118 |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
|
119 |
-
return round(cosine_sim[0][0] * 100, 2)
|
120 |
|
121 |
-
|
122 |
-
def analyze_with_gemini(resume_text, job_description):
|
123 |
-
"""
|
124 |
-
Use Gemini Flash 1.5 to generate an ATS analysis.
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
job_description: Job description content.
|
129 |
-
Returns:
|
130 |
-
str: AI-generated analysis.
|
131 |
-
"""
|
132 |
prompt = f"""
|
133 |
-
|
|
|
134 |
Resume: {resume_text}
|
135 |
Job Description: {job_description}
|
136 |
-
|
137 |
-
|
138 |
-
- Skills
|
139 |
-
- Education
|
140 |
-
- Leadership Experience (years)
|
141 |
-
- Match Percentage
|
142 |
-
Provide a summary of the candidate's strengths in bullet points.
|
143 |
"""
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
154 |
|
155 |
-
# Streamlit Interface
|
156 |
-
st.title("Resume ATS Analysis Tool")
|
157 |
-
st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
|
158 |
-
|
159 |
-
uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
|
160 |
-
job_description = st.text_area("Job Description", height=200)
|
161 |
-
|
162 |
-
if uploaded_file and job_description.strip():
|
163 |
-
if st.button("Analyze"):
|
164 |
-
resume_text = extract_pdf_text(uploaded_file)
|
165 |
-
if not resume_text:
|
166 |
-
st.error("No text extracted from PDF. Please upload a valid file.")
|
167 |
-
st.stop()
|
168 |
-
|
169 |
-
# Extract candidate details
|
170 |
-
email, phone = extract_contact_info(resume_text)
|
171 |
-
skills = extract_skills(resume_text)
|
172 |
-
education = extract_education(resume_text)
|
173 |
-
match_percentage = calculate_match_percentage(resume_text, job_description)
|
174 |
-
gemini_analysis = analyze_with_gemini(resume_text, job_description)
|
175 |
-
|
176 |
-
# Prepare the results
|
177 |
-
results = {
|
178 |
-
"Email": email,
|
179 |
-
"Contact": phone,
|
180 |
-
"Skills": skills,
|
181 |
-
"Education": education,
|
182 |
-
"Match Percentage": match_percentage,
|
183 |
-
"Gemini Analysis": gemini_analysis
|
184 |
-
}
|
185 |
-
|
186 |
-
# Display results
|
187 |
-
st.write(pd.DataFrame([results]))
|
188 |
-
|
189 |
-
# Allow download as CSV
|
190 |
-
csv = pd.DataFrame([results]).to_csv(index=False)
|
191 |
-
st.download_button(
|
192 |
-
label="Download Results as CSV",
|
193 |
-
data=csv,
|
194 |
-
file_name="resume_analysis_results.csv",
|
195 |
-
mime="text/csv"
|
196 |
-
)
|
197 |
-
else:
|
198 |
-
st.info("Upload a resume and provide a job description to start the analysis.")
|
|
|
1 |
+
import spacy
|
|
|
|
|
|
|
|
|
|
|
2 |
import streamlit as st
|
|
|
3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
import PyPDF2
|
6 |
+
import nltk
|
7 |
+
from nltk.corpus import stopwords
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
from gemini_flash import GeminiFlash # Assuming Gemini Flash is installed
|
10 |
|
11 |
+
# Ensure that NLTK's stopwords are available
|
12 |
+
nltk.download('punkt')
|
13 |
+
nltk.download('stopwords')
|
|
|
14 |
|
15 |
+
# Load spaCy model for NER
|
16 |
+
nlp = spacy.load("en_core_web_sm")
|
17 |
|
18 |
+
# Initialize Gemini Flash for prompt engineering
|
19 |
+
prompt_engineer = GeminiFlash()
|
|
|
20 |
|
21 |
+
# Streamlit Interface
|
22 |
+
st.title("AI Resume and Job Description Analyzer")
|
23 |
+
|
24 |
+
# Step 1: Resume Upload
|
25 |
+
uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")
|
26 |
+
|
27 |
+
if uploaded_file is not None:
|
28 |
+
# Read the PDF file
|
29 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
30 |
+
resume_text = ""
|
31 |
+
for page in range(len(pdf_reader.pages)):
|
32 |
+
resume_text += pdf_reader.pages[page].extract_text()
|
33 |
+
|
34 |
+
# Display the resume text
|
35 |
+
st.text_area("Resume Text", resume_text, height=300)
|
36 |
+
|
37 |
+
# Step 2: Job Description Input
|
38 |
+
job_description = st.text_area("Enter Job Description")
|
39 |
+
|
40 |
+
if job_description:
|
41 |
+
# Preprocess job description using NLTK
|
42 |
+
stop_words = set(stopwords.words("english"))
|
43 |
+
tokens = word_tokenize(job_description)
|
44 |
+
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
|
45 |
+
ps = nltk.PorterStemmer()
|
46 |
+
stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
|
47 |
+
preprocessed_job_description = " ".join(stemmed_tokens)
|
48 |
+
|
49 |
+
# Display preprocessed job description
|
50 |
+
st.text_area("Processed Job Description", preprocessed_job_description)
|
51 |
+
|
52 |
+
# Step 3: Named Entity Recognition (NER) on Resume
|
53 |
+
if resume_text:
|
54 |
+
doc = nlp(resume_text)
|
55 |
+
entities = [(ent.text, ent.label_) for ent in doc.ents]
|
56 |
+
|
57 |
+
# Display extracted entities
|
58 |
+
st.subheader("Named Entities from Resume")
|
59 |
+
st.write(entities)
|
60 |
+
|
61 |
+
# Step 4: Candidate-Job Relevance Using Cosine Similarity
|
62 |
+
if resume_text and job_description:
|
63 |
+
vectorizer = TfidfVectorizer()
|
64 |
+
tfidf_matrix = vectorizer.fit_transform([job_description, resume_text])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
|
|
|
66 |
|
67 |
+
st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}")
|
|
|
|
|
|
|
68 |
|
69 |
+
# Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM)
|
70 |
+
if resume_text and job_description:
|
|
|
|
|
|
|
|
|
71 |
prompt = f"""
|
72 |
+
Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements.
|
73 |
+
|
74 |
Resume: {resume_text}
|
75 |
Job Description: {job_description}
|
76 |
+
|
77 |
+
Based on the information provided, generate a detailed match score between the candidate and the job.
|
|
|
|
|
|
|
|
|
|
|
78 |
"""
|
79 |
+
|
80 |
+
# Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt)
|
81 |
+
enhanced_prompt = prompt_engineer.refine_prompt(prompt)
|
82 |
+
|
83 |
+
# Display the enhanced prompt (for debugging or transparency)
|
84 |
+
st.subheader("Enhanced Prompt for LLM")
|
85 |
+
st.write(enhanced_prompt)
|
86 |
+
|
87 |
+
# Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation
|
88 |
+
# For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model.
|
89 |
+
# response = get_llm_response(enhanced_prompt)
|
90 |
+
# st.write("LLM Response:", response)
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|