DreamStream-1 commited on
Commit
3878720
·
verified ·
1 Parent(s): 567834b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -160
app.py CHANGED
@@ -1,23 +1,17 @@
1
- import os
2
- import pandas as pd
3
- import google.generativeai as genai
4
- import PyPDF2 as pdf
5
- import io
6
  import re
7
- import streamlit as st
8
- from transformers import pipeline
 
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
 
 
11
 
12
- # Set API key for Google API (Make sure it's securely set in your environment variables)
13
- api_key = os.getenv('GOOGLE_API_KEY')
14
- if not api_key:
15
- raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.")
16
-
17
- # Initialize the generative AI model
18
  genai.configure(api_key=api_key)
19
 
20
- # Load pre-trained models
21
  skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
22
  education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
23
 
@@ -30,41 +24,31 @@ def input_pdf_text(uploaded_file):
30
  text += page.extract_text()
31
  return text
32
 
33
- # Extract candidate name directly from the model response
34
- def extract_name_from_model_response(response_text):
35
- match = re.search(r"Candidate Name:\s*(.*)", response_text)
36
- if match:
37
- return match.group(1)
38
- return "Not Available"
39
-
40
- # Extract email and phone numbers using regex
41
  def extract_contact_info(resume_text):
42
  email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
 
43
  email = email_match.group(0) if email_match else "Not Available"
 
 
44
 
45
- contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
46
- contact = contact_match.group(0) if contact_match else "Not Available"
47
-
48
- return email, contact
49
-
50
- # Extract skills using NER model
51
  def extract_skills(resume_text):
52
  ner_results = skill_extractor(resume_text)
53
  skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
54
  return ", ".join(skills) if skills else "Not Available"
55
 
56
- # Extract education information using NER model
57
  def extract_education(resume_text):
58
  ner_results = education_extractor(resume_text)
59
  education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
60
 
61
- # If no direct education found, fall back to regex-based extraction
62
  if education_entities:
63
  return ", ".join(education_entities)
64
  else:
65
- # Using regex to capture education background
66
  edu_patterns = [
67
- r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)",
68
  r"(University of [A-Za-z]+.*)"
69
  ]
70
  education = []
@@ -74,141 +58,53 @@ def extract_education(resume_text):
74
 
75
  return ", ".join(education) if education else "Not Available"
76
 
77
- # Extract team leadership and management years from the resume
78
- def extract_experience_years(text):
79
- years = 0
80
- patterns = [
81
- r"(\d{4})\s?[-to]+\s?(\d{4})", # From year to year
82
- r"(\d+) years", # Exact mention of years
83
- r"since (\d{4})", # Mentions "since"
84
- r"(\d+)\s?[\-–]\s?(\d+)", # Handles year ranges with hyphens (e.g., 2015-2020)
85
- r"(\d+)\s?[\–]\s?present", # Present with range (e.g., 2019–present)
86
- ]
87
-
88
- for pattern in patterns:
89
- matches = re.findall(pattern, text)
90
- for match in matches:
91
- if len(match) == 2:
92
- start_year = int(match[0])
93
- end_year = int(match[1])
94
- years += end_year - start_year
95
- elif len(match) == 1:
96
- years += int(match[0])
97
-
98
- return years
99
-
100
- # Calculate the match percentage using TF-IDF and cosine similarity
101
  def calculate_match_percentage(resume_text, job_description):
102
- # Combine resume and job description into a list
103
  documents = [resume_text, job_description]
104
-
105
- # Use TF-IDF to convert the documents into vectors
106
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')
107
  tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
108
-
109
- # Compute cosine similarity between resume and job description
110
  cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
 
111
 
112
- # Return match percentage based on cosine similarity score
113
- match_percentage = cosine_sim[0][0] * 100
114
- return round(match_percentage, 2)
115
-
116
- # Generate the detailed analysis from the Gemini model
117
- def get_gemini_response(input_text, job_description):
118
- prompt = f"""
119
- Act as an Applicant Tracking System. Analyze the resume with respect to the job description.
120
- Candidate Details: {input_text}
121
- Job Description: {job_description}
122
- Please extract the following:
123
- 1. Candidate Name
124
- 2. Relevant Skills
125
- 3. Educational Background
126
- 4. Direct Team Leadership Experience (in years)
127
- 5. Direct Management Experience (in years)
128
- 6. Match percentage with the job description
129
- 7. Provide a resume summary in 5 bullet points highlighting the key details about the candidate
130
- """
131
  model = genai.GenerativeModel('gemini-1.5-flash')
132
  response = model.generate_content(prompt)
133
  return response.text.strip()
134
 
135
- # Streamlit interface to upload files and provide job description
136
- st.title("Resume ATS Analysis Tool")
137
- st.markdown("### Upload Resume and Job Description for Analysis")
138
-
139
- # File uploader for resume PDF
140
- uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
141
-
142
- # Job description text input
143
- job_description = st.text_area("Job Description", height=200)
144
-
145
- if uploaded_file and job_description:
146
- analyze_button = st.button("Analyze")
147
-
148
- if analyze_button:
149
- # Extract text from the uploaded PDF
150
- resume_text = input_pdf_text(uploaded_file)
151
-
152
- # Fetch analysis from the AI model
153
- response_text = get_gemini_response(resume_text, job_description)
154
-
155
- # Initialize an empty dictionary to hold the dynamic data
156
- data = {}
157
-
158
- # Extract candidate name
159
- name = extract_name_from_model_response(response_text)
160
- data['Candidate_Name'] = name if name != "Not Available" else "Not Available"
161
-
162
- # Extract contact info (email, phone)
163
- email, contact = extract_contact_info(resume_text)
164
- data['Email'] = email if email != "Not Available" else "Not Available"
165
- data['Contact'] = contact if contact != "Not Available" else "Not Available"
166
-
167
- # Extract team leadership and management experience
168
- team_leadership_years = extract_experience_years(resume_text)
169
- management_years = extract_experience_years(resume_text)
170
- data['Direct_Team_Leadership_Experience_Years'] = team_leadership_years if team_leadership_years > 0 else "Not Available"
171
- data['Direct_Management_Experience_Years'] = management_years if management_years > 0 else "Not Available"
172
-
173
- # Extract skills using the NER model
174
- relevant_skills = extract_skills(resume_text)
175
- data['Relevant_Skills_and_Qualifications'] = relevant_skills if relevant_skills != "Not Available" else "Not Available"
176
-
177
- # Extract education using the NER model or regex
178
- educational_background = extract_education(resume_text)
179
- data['Educational_Background'] = educational_background if educational_background != "Not Available" else "Not Available"
180
-
181
- # Calculate match percentage dynamically
182
- match_percentage = calculate_match_percentage(resume_text, job_description)
183
- data['Match_Percentage'] = match_percentage
184
-
185
- # Calculate Job Description Match Score dynamically (based on match percentage)
186
- if match_percentage >= 80:
187
- job_description_match_score = "High"
188
- elif match_percentage >= 60:
189
- job_description_match_score = "Medium"
190
- else:
191
- job_description_match_score = "Low"
192
- data['Job_Description_Match_Score'] = job_description_match_score
193
-
194
- # Extract resume summary from the model response (5 bullet points)
195
- bullet_summary = "Resume Summary (5 Bullet Points):\n"
196
- bullets = response_text.split("\n")[-5:] # Assuming the last 5 lines are the bullet points
197
- for idx, bullet in enumerate(bullets):
198
- bullet_summary += f"{idx+1}. {bullet}\n"
199
- data['Resume_Summary'] = bullet_summary
200
-
201
- # Prepare DataFrame for output
202
- df = pd.DataFrame([data])
203
-
204
- # Show the result table
205
- st.write(df)
206
-
207
- # Provide a download button for the CSV file
208
- csv = df.to_csv(index=False)
209
- st.download_button(
210
- label="Download Analysis as CSV",
211
- data=csv,
212
- file_name="resume_analysis.csv",
213
- mime="text/csv"
214
- )
 
 
 
 
 
 
1
  import re
2
+ import io
3
+ import PyPDF2 as pdf
4
+ import pandas as pd
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
+ from transformers import pipeline
8
+ import google.generativeai as genai
9
 
10
+ # Initialize Google Gemini AI API for summarization (ensure API key is set up)
11
+ api_key = 'YOUR_GOOGLE_API_KEY'
 
 
 
 
12
  genai.configure(api_key=api_key)
13
 
14
+ # Set up NER pipelines for skill and education extraction
15
  skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
16
  education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
17
 
 
24
  text += page.extract_text()
25
  return text
26
 
27
+ # Extract contact information using regular expressions (email, phone)
 
 
 
 
 
 
 
28
  def extract_contact_info(resume_text):
29
  email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
30
+ phone_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
31
  email = email_match.group(0) if email_match else "Not Available"
32
+ phone = phone_match.group(0) if phone_match else "Not Available"
33
+ return email, phone
34
 
35
+ # Extract skills using NER model or regex
 
 
 
 
 
36
  def extract_skills(resume_text):
37
  ner_results = skill_extractor(resume_text)
38
  skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
39
  return ", ".join(skills) if skills else "Not Available"
40
 
41
+ # Extract education details using NER model
42
  def extract_education(resume_text):
43
  ner_results = education_extractor(resume_text)
44
  education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
45
 
46
+ # If no direct education found, use regex patterns to capture education
47
  if education_entities:
48
  return ", ".join(education_entities)
49
  else:
 
50
  edu_patterns = [
51
+ r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech)",
52
  r"(University of [A-Za-z]+.*)"
53
  ]
54
  education = []
 
58
 
59
  return ", ".join(education) if education else "Not Available"
60
 
61
+ # Calculate match percentage between resume and job description
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def calculate_match_percentage(resume_text, job_description):
 
63
  documents = [resume_text, job_description]
 
 
64
  tfidf_vectorizer = TfidfVectorizer(stop_words='english')
65
  tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
 
 
66
  cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
67
+ return cosine_sim[0][0] * 100 # Return percentage match
68
 
69
+ # Generate resume summary using Gemini API
70
+ def generate_resume_summary(resume_text):
71
+ prompt = f"Summarize the resume below in 5 bullet points:\n\n{resume_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  model = genai.GenerativeModel('gemini-1.5-flash')
73
  response = model.generate_content(prompt)
74
  return response.text.strip()
75
 
76
+ # Main function to process and analyze the resume and job description
77
+ def analyze_resume_and_job(resume_pdf, job_description):
78
+ resume_text = input_pdf_text(resume_pdf)
79
+
80
+ # Extract candidate details
81
+ email, phone = extract_contact_info(resume_text)
82
+ skills = extract_skills(resume_text)
83
+ education = extract_education(resume_text)
84
+
85
+ # Calculate match percentage
86
+ match_percentage = calculate_match_percentage(resume_text, job_description)
87
+
88
+ # Generate summary using Gemini
89
+ resume_summary = generate_resume_summary(resume_text)
90
+
91
+ # Prepare results for display
92
+ result_data = {
93
+ 'Email': email,
94
+ 'Phone': phone,
95
+ 'Skills': skills,
96
+ 'Education': education,
97
+ 'Match_Percentage': match_percentage,
98
+ 'Resume_Summary': resume_summary
99
+ }
100
+
101
+ # Create a DataFrame for display
102
+ df = pd.DataFrame([result_data])
103
+
104
+ return df
105
+
106
+ # Example usage
107
+ resume_pdf = 'path_to_resume.pdf' # Path to the uploaded resume PDF
108
+ job_description = '''Insert job description here''' # Job description text
109
+ df = analyze_resume_and_job(resume_pdf, job_description)
110
+ print(df)