DreamStream-1 commited on
Commit
7b54322
·
verified ·
1 Parent(s): 0068f78

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -0
app.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import google.generativeai as genai
4
+ import PyPDF2 as pdf
5
+ import io
6
+ import re
7
+ import streamlit as st
8
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ import torch
12
+
13
+ # Set API key for Google API (Make sure it's securely set in your environment variables)
14
+ api_key = os.getenv('GOOGLE_API_KEY')
15
+ if not api_key:
16
+ raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.")
17
+
18
+ # Initialize the generative AI model
19
+ genai.configure(api_key=api_key)
20
+
21
+ # Load pre-trained models
22
+ skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
23
+ education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
24
+
25
+ # Define the task and model for Hugging Face
26
+ task = "sentiment-analysis"
27
+ model_name = "roberta-base" # Using RoBERTa
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
30
+
31
+ # Extract text from uploaded PDF file
32
+ def input_pdf_text(uploaded_file):
33
+ file_stream = io.BytesIO(uploaded_file.read())
34
+ reader = pdf.PdfReader(file_stream)
35
+ text = ""
36
+ for page in reader.pages:
37
+ text += page.extract_text()
38
+ return text
39
+
40
+ # Extract candidate name directly from the model response
41
+ def extract_name_from_model_response(response_text):
42
+ match = re.search(r"Candidate Name:\s*(.*)", response_text)
43
+ if match:
44
+ return match.group(1)
45
+ return "Not Available"
46
+
47
+ # Extract email and phone numbers using regex
48
+ def extract_contact_info(resume_text):
49
+ email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
50
+ email = email_match.group(0) if email_match else "Not Available"
51
+
52
+ contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
53
+ contact = contact_match.group(0) if contact_match else "Not Available"
54
+
55
+ return email, contact
56
+
57
+ # Extract skills using NER model
58
+ def extract_skills(resume_text):
59
+ ner_results = skill_extractor(resume_text)
60
+ skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
61
+ return ", ".join(skills) if skills else "Not Available"
62
+
63
+ # Extract education information using NER model
64
+ def extract_education(resume_text):
65
+ ner_results = education_extractor(resume_text)
66
+ education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
67
+
68
+ if education_entities:
69
+ return ", ".join(education_entities)
70
+ else:
71
+ edu_patterns = [
72
+ r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)",
73
+ r"(University of [A-Za-z]+.*)"
74
+ ]
75
+ education = []
76
+ for pattern in edu_patterns:
77
+ matches = re.findall(pattern, resume_text)
78
+ education.extend(matches)
79
+
80
+ return ", ".join(education) if education else "Not Available"
81
+
82
+ # Extract team leadership and management years from the resume
83
+ def extract_experience_years(text):
84
+ years = 0
85
+ patterns = [
86
+ r"(\d{4})\s?[-to]+\s?(\d{4})", # From year to year
87
+ r"(\d+) years", # Exact mention of years
88
+ r"since (\d{4})", # Mentions "since"
89
+ r"(\d+)\s?[\-–]\s?(\d+)", # Handles year ranges with hyphens (e.g., 2015-2020)
90
+ r"(\d+)\s?[\–]\s?present", # Present with range (e.g., 2019–present)
91
+ ]
92
+
93
+ for pattern in patterns:
94
+ matches = re.findall(pattern, text)
95
+ for match in matches:
96
+ if len(match) == 2:
97
+ start_year = int(match[0])
98
+ end_year = int(match[1])
99
+ years += end_year - start_year
100
+ elif len(match) == 1:
101
+ years += int(match[0])
102
+
103
+ return years
104
+
105
+ # Calculate the match percentage using TF-IDF and cosine similarity
106
+ def calculate_match_percentage(resume_text, job_description):
107
+ documents = [resume_text, job_description]
108
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
109
+ tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
110
+ cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
111
+ match_percentage = cosine_sim[0][0] * 100
112
+ return round(match_percentage, 2)
113
+
114
+ # Generate the detailed analysis from the Gemini model
115
+ def get_gemini_response(input_text, job_description):
116
+ prompt = f"""
117
+ Act as an Applicant Tracking System. Analyze the resume with respect to the job description.
118
+ Candidate Details: {input_text}
119
+ Job Description: {job_description}
120
+ Please extract the following:
121
+ 1. Candidate Name
122
+ 2. Relevant Skills
123
+ 3. Educational Background
124
+ 4. Direct Team Leadership Experience (in years)
125
+ 5. Direct Management Experience (in years)
126
+ 6. Match percentage with the job description
127
+ 7. Provide a resume summary in 5 bullet points highlighting the candidate's qualifications.
128
+ """
129
+ model = genai.GenerativeModel('gemini-1.5-flash')
130
+ response = model.generate_content(prompt)
131
+ return response.text.strip()
132
+
133
+ # Extract a detailed resume summary (focusing on leadership roles and team management experience)
134
+ def extract_leadership_summary(response_text):
135
+ leadership_summary = "Resume Summary: Leadership and Team Management Experience (in years)\n"
136
+ lines = response_text.strip().split("\n")
137
+ meaningful_lines = [line.strip() for line in lines if line.strip()]
138
+ leadership_experience = []
139
+
140
+ for line in meaningful_lines:
141
+ if "leadership" in line.lower() or "management" in line.lower() or "team" in line.lower():
142
+ leadership_experience.append(line)
143
+
144
+ leadership_experience = leadership_experience[-5:] if len(leadership_experience) >= 5 else leadership_experience
145
+
146
+ for idx, bullet in enumerate(leadership_experience, 1):
147
+ leadership_summary += f"{idx}. {bullet}\n"
148
+
149
+ return leadership_summary
150
+
151
+ # Analyze the resume using Hugging Face RoBERTa
152
+ def analyze_resume(resume_text):
153
+ # Create input prompts for different aspects
154
+ prompts = [
155
+ f"This resume shows strong managerial responsibilities: {resume_text}",
156
+ f"This resume demonstrates excellent leadership skills: {resume_text}",
157
+ f"This resume indicates significant work experience: {resume_text}",
158
+ f"This resume indicates at least 2 years of relevant experience: {resume_text}"
159
+ ]
160
+
161
+ results = []
162
+ for prompt in prompts:
163
+ # Tokenize the prompt with truncation
164
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
165
+ outputs = model(**inputs)
166
+ predicted_class = torch.argmax(outputs.logits).item()
167
+ results.append(predicted_class)
168
+
169
+ # Interpret the results
170
+ analysis = {
171
+ "managerial_responsibilities": results[0] == 1, # Assuming 1 is positive sentiment
172
+ "leadership_skills": results[1] == 1,
173
+ "work_experience": results[2] == 1,
174
+ "relevant_experience": results[3] == 1
175
+ }
176
+
177
+ # Check if all criteria are met
178
+ is_suitable = all(analysis.values())
179
+
180
+ return analysis, is_suitable
181
+
182
+ # Streamlit interface to upload files and provide job description
183
+ st.title("Resume ATS Analysis Tool")
184
+ st.markdown("### Upload Resume and Job Description for Analysis")
185
+
186
+ # File uploader for resume PDF
187
+ uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
188
+
189
+ # Job description text input
190
+ job_description = st.text_area("Job Description", height=200)
191
+
192
+ if uploaded_file and job_description:
193
+ analyze_button = st.button("Analyze")
194
+
195
+ if analyze_button:
196
+ resume_text = input_pdf_text(uploaded_file)
197
+ response_text = get_gemini_response(resume_text, job_description)
198
+
199
+ # Initialize an empty dictionary to hold the dynamic data
200
+ data = {}
201
+
202
+ # Extract candidate name
203
+ name = extract_name_from_model_response(response_text)
204
+ data['Candidate_Name'] = name if name != "Not Available" else "Not Available"
205
+
206
+ # Extract contact info (email, phone)
207
+ email, contact = extract_contact_info(resume_text)
208
+ data['Email'] = email if email != "Not Available" else "Not Available"
209
+ data['Contact'] = contact if contact != "Not Available" else "Not Available"
210
+
211
+ # Extract skills
212
+ skills = extract_skills(resume_text)
213
+ data['Skills'] = skills if skills != "Not Available" else "Not Available"
214
+
215
+ # Extract education
216
+ education = extract_education(resume_text)
217
+ data['Education'] = education if education != "Not Available" else "Not Available"
218
+
219
+ # Extract team leadership and management experience
220
+ team_leadership_years = extract_experience_years(resume_text)
221
+ data['Team_Leadership_Experience (Years)'] = team_leadership_years
222
+ management_experience_years = extract_experience_years(resume_text)
223
+ data['Management_Experience (Years)'] = management_experience_years
224
+
225
+ # Calculate match percentage dynamically
226
+ match_percentage = calculate_match_percentage(resume_text, job_description)
227
+ data['Match_Percentage'] = match_percentage
228
+
229
+ # Calculate Job Description Match Score dynamically (based on match percentage)
230
+ if match_percentage >= 80:
231
+ job_description_match_score = "High"
232
+ elif match_percentage >= 60:
233
+ job_description_match_score = "Medium"
234
+ else:
235
+ job_description_match_score = "Low"
236
+ data['Job_Description_Match_Score'] = job_description_match_score
237
+
238
+ # Extract leadership and team management summary
239
+ leadership_summary = extract_leadership_summary(response_text)
240
+ data['Leadership_and_Team_Management_Summary'] = leadership_summary
241
+
242
+ # Analyze the resume using Hugging Face RoBERTa
243
+ analysis, is_suitable = analyze_resume(resume_text)
244
+ data['Managerial_Responsibilities'] = analysis['managerial_responsibilities']
245
+ data['Leadership_Skills'] = analysis['leadership_skills']
246
+ data['Work_Experience'] = analysis['work_experience']
247
+ data['Relevant_Experience'] = analysis['relevant_experience']
248
+ data['Suitable_for_Role'] = is_suitable
249
+
250
+ # Display the results as a table
251
+ df = pd.DataFrame([data])
252
+ st.write(df)
253
+
254
+ # Download the results as a CSV file
255
+ csv = df.to_csv(index=False)
256
+ st.download_button(
257
+ label="Download Results as CSV",
258
+ data=csv,
259
+ file_name='resume_analysis_results.csv',
260
+ mime='text/csv'
261
+ )
262
+
263
+ else:
264
+ st.write("Please upload a resume and provide a job description to analyze.")