DreamStream-1 commited on
Commit
36492c8
·
verified ·
1 Parent(s): 1cf7d5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -105
app.py CHANGED
@@ -1,182 +1,198 @@
1
  import os
2
  import pandas as pd
3
  import google.generativeai as genai
4
- import PyPDF2
5
  import io
6
  import re
7
  import streamlit as st
 
8
  from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.metrics.pairwise import cosine_similarity
 
10
 
11
- # Set API Key
12
- api_key = os.getenv("GOOGLE_API_KEY")
13
  if not api_key:
14
- st.error("API key not found. Please set GOOGLE_API_KEY in your environment variables.")
15
- st.stop()
16
 
17
- # Configure Generative AI client
18
  genai.configure(api_key=api_key)
19
 
20
- # Generate Response using Gemini Flash 1.5
21
- def generate_with_gemini(prompt, model="gemini-1p5", max_output_tokens=256):
22
- """
23
- Generate a response using the Gemini Flash 1.5 model.
24
-
25
- Args:
26
- prompt (str): Input prompt for the AI model.
27
- model (str): Model to use (default: "gemini-1p5").
28
- max_output_tokens (int): Limit for the generated output tokens.
29
 
30
- Returns:
31
- str: Generated text response from the model.
32
- """
33
- try:
34
- response = genai.generate_text(
35
- model=model,
36
- prompt=prompt,
37
- temperature=0.7,
38
- max_output_tokens=max_output_tokens
39
- )
40
- return response.result # Adjust this if response structure differs
41
- except Exception as e:
42
- return f"Error generating text: {str(e)}"
43
 
44
- # Extract Text from Uploaded PDF
45
- def extract_text_from_pdf(file):
46
  """
47
- Extract text from uploaded PDF file.
48
 
49
  Args:
50
- file (UploadedFile): PDF file uploaded via Streamlit.
51
-
52
  Returns:
53
- str: Extracted text or error message.
54
  """
55
  try:
56
- reader = PyPDF2.PdfReader(io.BytesIO(file.read()))
57
- text = ''.join(page.extract_text() for page in reader.pages)
 
58
  return text.strip()
59
  except Exception as e:
60
- st.error(f"Error extracting text from PDF: {str(e)}")
61
  return ""
62
 
63
- # Extract Contact Information
64
  def extract_contact_info(text):
65
  """
66
- Extract email and phone number from text using regex.
67
 
68
  Args:
69
- text (str): Input text.
70
-
71
  Returns:
72
- tuple: Extracted email and phone number or "Not Available".
73
  """
74
  email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
75
- phone = re.search(r"\+?[\d\s().-]{7,15}", text)
76
-
77
- return (email.group(0) if email else "Not Available",
78
  phone.group(0) if phone else "Not Available")
79
 
80
- # Extract Management Experience
81
- def extract_management_experience(text):
82
  """
83
- Extract management and leadership keywords and years.
84
 
85
  Args:
86
- text (str): Input resume text.
87
-
88
  Returns:
89
- tuple: Total years of experience and matching keywords.
90
  """
91
- keywords = ["manager", "team lead", "director", "executive", "supervisor", "leadership", "head"]
92
- patterns = [
93
- r"(\d+)\s?(years|yrs|year)\s?of\s?(management|leadership)",
94
- r"(\d+)\s?(years|yrs|year)\s?experience\s?(managing|leading)"
95
- ]
96
-
97
- found_keywords = [kw for kw in keywords if kw in text.lower()]
98
- years = sum(int(match[0]) for pattern in patterns for match in re.findall(pattern, text))
99
 
100
- return years, ", ".join(found_keywords) if found_keywords else "Not Available"
 
 
 
101
 
102
- # Calculate Match Percentage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def calculate_match_percentage(resume_text, job_description):
104
  """
105
- Calculate similarity between resume and job description using TF-IDF.
106
 
107
  Args:
108
- resume_text (str): Resume content.
109
- job_description (str): Job description.
110
-
111
  Returns:
112
  float: Match percentage (0-100).
113
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  try:
115
- vectorizer = TfidfVectorizer(stop_words='english')
116
- tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
117
- cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
118
- return round(cosine_sim[0][0] * 100, 2)
 
 
 
119
  except Exception as e:
120
- st.error(f"Error calculating match percentage: {str(e)}")
121
- return 0.0
122
 
123
- # Streamlit User Interface
124
- st.title("Resume ATS Analysis Tool: Powered by Gemini Flash 1.5")
125
- st.markdown("### Upload a Resume PDF and Enter a Job Description")
126
 
127
- uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
128
  job_description = st.text_area("Job Description", height=200)
129
 
130
  if uploaded_file and job_description.strip():
131
  if st.button("Analyze"):
132
- # Extract resume text
133
- resume_text = extract_text_from_pdf(uploaded_file)
134
  if not resume_text:
135
- st.error("Failed to extract text from PDF. Ensure the file is valid.")
136
  st.stop()
137
 
138
- # Extract contact information
139
  email, phone = extract_contact_info(resume_text)
140
-
141
- # Extract management experience
142
- management_years, management_keywords = extract_management_experience(resume_text)
143
-
144
- # Calculate match percentage
145
  match_percentage = calculate_match_percentage(resume_text, job_description)
 
146
 
147
- # Generate AI analysis
148
- prompt = f"""
149
- Analyze the resume with respect to the job description.
150
- Resume Text: {resume_text}
151
- Job Description: {job_description}
152
- Provide details:
153
- - Key Skills
154
- - Education
155
- - Management Experience (Years)
156
- - Leadership Keywords
157
- - Match Percentage
158
- """
159
- gemini_response = generate_with_gemini(prompt)
160
-
161
- # Display results
162
  results = {
163
  "Email": email,
164
  "Contact": phone,
165
- "Management Experience (Years)": management_years,
166
- "Leadership Keywords": management_keywords,
167
  "Match Percentage": match_percentage,
168
- "AI Summary": gemini_response
169
  }
170
 
 
171
  st.write(pd.DataFrame([results]))
172
 
173
- # Allow CSV download
174
  csv = pd.DataFrame([results]).to_csv(index=False)
175
  st.download_button(
176
- "Download Results",
177
  data=csv,
178
  file_name="resume_analysis_results.csv",
179
  mime="text/csv"
180
  )
181
  else:
182
- st.info("Please upload a resume and enter a job description to proceed.")
 
1
  import os
2
  import pandas as pd
3
  import google.generativeai as genai
4
+ import PyPDF2 as pdf
5
  import io
6
  import re
7
  import streamlit as st
8
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
+ import torch
12
 
13
+ # Set API key for Google Generative AI
14
+ api_key = os.getenv('GOOGLE_API_KEY')
15
  if not api_key:
16
+ raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
 
17
 
18
+ # Initialize the generative AI client
19
  genai.configure(api_key=api_key)
20
 
21
+ # Load Hugging Face pipelines and models
22
+ skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
23
+ education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
 
 
 
 
 
 
24
 
25
+ # Sentiment analysis using Hugging Face RoBERTa
26
+ task = "sentiment-analysis"
27
+ model_name = "roberta-base"
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
 
 
 
 
 
30
 
31
+ # Function to extract text from uploaded PDF
32
+ def extract_pdf_text(uploaded_file):
33
  """
34
+ Extract text from the uploaded PDF file.
35
 
36
  Args:
37
+ uploaded_file: Streamlit uploaded file object.
 
38
  Returns:
39
+ str: Extracted text content.
40
  """
41
  try:
42
+ file_stream = io.BytesIO(uploaded_file.read())
43
+ reader = pdf.PdfReader(file_stream)
44
+ text = "".join([page.extract_text() for page in reader.pages])
45
  return text.strip()
46
  except Exception as e:
47
+ st.error(f"Error extracting text from PDF: {e}")
48
  return ""
49
 
50
+ # Function to extract email and phone numbers
51
  def extract_contact_info(text):
52
  """
53
+ Extract email and phone number using regex.
54
 
55
  Args:
56
+ text: Extracted text content from the resume.
 
57
  Returns:
58
+ tuple: Extracted email and phone number.
59
  """
60
  email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
61
+ phone = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", text)
62
+ return (email.group(0) if email else "Not Available",
 
63
  phone.group(0) if phone else "Not Available")
64
 
65
+ # Function to extract skills using NER
66
+ def extract_skills(text):
67
  """
68
+ Extract skills from resume text using NER.
69
 
70
  Args:
71
+ text: Resume text.
 
72
  Returns:
73
+ str: Comma-separated skills or "Not Available".
74
  """
75
+ ner_results = skill_extractor(text)
76
+ skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
77
+ return ", ".join(skills) if skills else "Not Available"
 
 
 
 
 
78
 
79
+ # Function to extract education details
80
+ def extract_education(text):
81
+ """
82
+ Extract education information using NER and regex.
83
 
84
+ Args:
85
+ text: Resume text.
86
+ Returns:
87
+ str: Extracted education details.
88
+ """
89
+ ner_results = education_extractor(text)
90
+ education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
91
+
92
+ if education_entities:
93
+ return ", ".join(education_entities)
94
+ else:
95
+ education_patterns = [
96
+ r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
97
+ r"(University of [A-Za-z]+)"
98
+ ]
99
+ matches = []
100
+ for pattern in education_patterns:
101
+ matches.extend(re.findall(pattern, text))
102
+ return ", ".join(matches) if matches else "Not Available"
103
+
104
+ # Function to calculate match percentage using TF-IDF
105
  def calculate_match_percentage(resume_text, job_description):
106
  """
107
+ Calculate the match percentage using TF-IDF and cosine similarity.
108
 
109
  Args:
110
+ resume_text: Resume text.
111
+ job_description: Job description.
 
112
  Returns:
113
  float: Match percentage (0-100).
114
  """
115
+ documents = [resume_text, job_description]
116
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
117
+ tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
118
+ cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
119
+ return round(cosine_sim[0][0] * 100, 2)
120
+
121
+ # Function to analyze resume with Gemini Flash 1.5
122
+ def analyze_with_gemini(resume_text, job_description):
123
+ """
124
+ Use Gemini Flash 1.5 to generate an ATS analysis.
125
+
126
+ Args:
127
+ resume_text: Text content of the resume.
128
+ job_description: Job description content.
129
+ Returns:
130
+ str: AI-generated analysis.
131
+ """
132
+ prompt = f"""
133
+ Act as an advanced ATS. Analyze the resume and job description.
134
+ Resume: {resume_text}
135
+ Job Description: {job_description}
136
+ Extract:
137
+ - Candidate Name
138
+ - Skills
139
+ - Education
140
+ - Leadership Experience (years)
141
+ - Match Percentage
142
+ Provide a summary of the candidate's strengths in bullet points.
143
+ """
144
  try:
145
+ response = genai.generate_text(
146
+ model="gemini-1p5",
147
+ prompt=prompt,
148
+ temperature=0.7,
149
+ max_output_tokens=500
150
+ )
151
+ return response.result
152
  except Exception as e:
153
+ return f"Error generating analysis: {e}"
 
154
 
155
+ # Streamlit Interface
156
+ st.title("Resume ATS Analysis Tool")
157
+ st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
158
 
159
+ uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
160
  job_description = st.text_area("Job Description", height=200)
161
 
162
  if uploaded_file and job_description.strip():
163
  if st.button("Analyze"):
164
+ resume_text = extract_pdf_text(uploaded_file)
 
165
  if not resume_text:
166
+ st.error("No text extracted from PDF. Please upload a valid file.")
167
  st.stop()
168
 
169
+ # Extract candidate details
170
  email, phone = extract_contact_info(resume_text)
171
+ skills = extract_skills(resume_text)
172
+ education = extract_education(resume_text)
 
 
 
173
  match_percentage = calculate_match_percentage(resume_text, job_description)
174
+ gemini_analysis = analyze_with_gemini(resume_text, job_description)
175
 
176
+ # Prepare the results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  results = {
178
  "Email": email,
179
  "Contact": phone,
180
+ "Skills": skills,
181
+ "Education": education,
182
  "Match Percentage": match_percentage,
183
+ "Gemini Analysis": gemini_analysis
184
  }
185
 
186
+ # Display results
187
  st.write(pd.DataFrame([results]))
188
 
189
+ # Allow download as CSV
190
  csv = pd.DataFrame([results]).to_csv(index=False)
191
  st.download_button(
192
+ label="Download Results as CSV",
193
  data=csv,
194
  file_name="resume_analysis_results.csv",
195
  mime="text/csv"
196
  )
197
  else:
198
+ st.info("Upload a resume and provide a job description to start the analysis.")