DreamStream-1 commited on
Commit
3a9f7f8
1 Parent(s): 92726fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -107
app.py CHANGED
@@ -1,172 +1,162 @@
1
  import os
2
  import pandas as pd
3
  import google.generativeai as genai
4
- import PyPDF2 as pdf
5
  import io
6
  import re
7
  import streamlit as st
8
- from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
 
12
- # Set API key for Google Generative AI
13
  api_key = os.getenv("GOOGLE_API_KEY")
14
  if not api_key:
15
  st.error("API key not found. Please set GOOGLE_API_KEY in your environment variables.")
16
  st.stop()
17
 
18
- # Initialize Google Generative AI
19
  genai.configure(api_key=api_key)
20
 
21
- # Function for text generation using Google Generative AI
22
  def generate_response(prompt, model="text-bison-001", max_output_tokens=256):
 
 
 
 
 
 
 
 
 
 
 
23
  try:
24
- # Use the correct method for generating text (may vary based on API update)
25
  response = genai.chat(
26
  model=model,
27
  messages=[{"role": "user", "content": prompt}],
28
- temperature=0.7, # You can adjust temperature for more creative responses
29
  max_output_tokens=max_output_tokens
30
  )
31
- return response.result['content'] # Correct response structure
32
  except Exception as e:
33
  return f"Error generating text: {str(e)}"
34
 
35
- # Extract text from uploaded PDF file
36
- def input_pdf_text(uploaded_file):
 
 
 
 
 
 
 
 
 
37
  try:
38
- file_stream = io.BytesIO(uploaded_file.read())
39
- reader = pdf.PdfReader(file_stream)
40
- text = ""
41
- for page in reader.pages:
42
- text += page.extract_text()
43
  return text.strip()
44
  except Exception as e:
45
  st.error(f"Error extracting text from PDF: {str(e)}")
46
  return ""
47
 
48
- # Extract email and phone numbers using regex
49
- def extract_contact_info(resume_text):
50
- email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
51
- phone_regex = r"\+?[\d\s().-]{7,15}"
52
 
53
- email_match = re.search(email_regex, resume_text)
54
- email = email_match.group(0) if email_match else "Not Available"
55
 
56
- contact_match = re.search(phone_regex, resume_text)
57
- contact = contact_match.group(0) if contact_match else "Not Available"
 
 
 
58
 
59
- return email, contact
 
60
 
61
- # Extract management and team leadership experience
62
  def extract_management_experience(text):
63
- management_keywords = [
64
- "manager", "team lead", "director", "executive", "head of", "supervisor", "leadership", "head"
65
- ]
66
- leadership_patterns = [
 
 
 
 
 
 
 
67
  r"(\d+)\s?(years|yrs|year)\s?of\s?(management|leadership)",
68
- r"(\d+)\s?(years|yrs|year)\s?experience\s?(managing|leading)",
69
- r"led\s?(\d+)\s?teams",
70
- r"(\d+)\s?team\s?(members|leaders)"
71
  ]
72
 
73
- management_years = 0
74
- leadership_experience = []
75
- for keyword in management_keywords:
76
- if keyword.lower() in text.lower():
77
- leadership_experience.append(keyword)
78
-
79
- for pattern in leadership_patterns:
80
- matches = re.findall(pattern, text)
81
- for match in matches:
82
- if len(match) == 2 and match[0].isdigit():
83
- management_years += int(match[0])
84
- elif len(match) == 1 and match[0].isdigit():
85
- management_years += int(match[0])
86
 
87
- management_experience = ', '.join(set(leadership_experience)) if leadership_experience else "Not Available"
88
- return management_years, management_experience
89
 
90
- # Calculate match percentage using TF-IDF and cosine similarity
91
  def calculate_match_percentage(resume_text, job_description):
92
- try:
93
- documents = [resume_text, job_description]
94
- tfidf_vectorizer = TfidfVectorizer(stop_words='english')
95
- tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
96
- cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
97
- return round(cosine_sim[0][0] * 100, 2) if cosine_sim.any() else 0.0
98
- except Exception:
99
- return 0.0
100
 
101
- # Generate analysis from Google Generative AI
102
- def get_gemini_response(input_text, job_description):
 
103
  try:
104
- prompt = f"""
105
- Analyze the resume with respect to the job description.
106
- Resume Text: {input_text}
107
- Job Description: {job_description}
108
- Extract details:
109
- - Name
110
- - Skills
111
- - Education
112
- - Management and Team Leadership Experience (years)
113
- - Match percentage
114
- """
115
- return generate_response(prompt)
116
  except Exception as e:
117
- st.error(f"Error generating response from Google Generative AI: {str(e)}")
118
- return ""
119
 
120
- # Streamlit interface
121
- st.title("Resume ATS Analysis Tool - Management & Leadership Experience Focus")
122
- st.markdown("### Upload a Resume and Provide a Job Description")
123
 
124
  uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
125
- job_description = st.text_area("Job Description (Required)", height=200)
126
 
127
  if uploaded_file and job_description.strip():
128
- if not uploaded_file.name.endswith('.pdf'):
129
- st.error("Only PDF files are supported.")
130
- st.stop()
131
-
132
- analyze_button = st.button("Analyze")
133
- if analyze_button:
134
- resume_text = input_pdf_text(uploaded_file)
135
-
136
  if not resume_text:
137
- st.error("No text found in the uploaded file.")
138
  st.stop()
139
 
140
- # Extract management and leadership experience
141
- management_years, management_experience = extract_management_experience(resume_text)
142
-
143
- # Generate analysis
144
- gemini_response = get_gemini_response(resume_text, job_description)
145
-
146
- # Extract data and calculate metrics
147
- email, contact = extract_contact_info(resume_text)
148
  match_percentage = calculate_match_percentage(resume_text, job_description)
149
 
150
- # Prepare results
 
 
 
 
 
 
 
151
  results = {
152
  "Email": email,
153
- "Contact": contact,
154
  "Management Experience (Years)": management_years,
155
- "Management & Leadership Keywords": management_experience,
156
  "Match Percentage": match_percentage,
157
- "Gemini Response Summary": gemini_response
158
  }
159
 
160
- # Display results
161
  st.write(pd.DataFrame([results]))
162
-
163
- # Enable CSV download
164
  csv = pd.DataFrame([results]).to_csv(index=False)
165
- st.download_button(
166
- label="Download Results as CSV",
167
- data=csv,
168
- file_name="resume_analysis_results.csv",
169
- mime="text/csv"
170
- )
171
  else:
172
- st.write("Please upload a resume and provide a job description.")
 
1
  import os
2
  import pandas as pd
3
  import google.generativeai as genai
4
+ import PyPDF2
5
  import io
6
  import re
7
  import streamlit as st
8
+ from transformers import pipeline
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
 
12
+ # Configure API Key
13
  api_key = os.getenv("GOOGLE_API_KEY")
14
  if not api_key:
15
  st.error("API key not found. Please set GOOGLE_API_KEY in your environment variables.")
16
  st.stop()
17
 
 
18
  genai.configure(api_key=api_key)
19
 
20
+ # Text Generation Function
21
  def generate_response(prompt, model="text-bison-001", max_output_tokens=256):
22
+ """
23
+ Generate text response using Google Generative AI.
24
+
25
+ Args:
26
+ prompt (str): Input prompt for AI.
27
+ model (str): Model to use for generation.
28
+ max_output_tokens (int): Maximum token limit.
29
+
30
+ Returns:
31
+ str: Generated text or error message.
32
+ """
33
  try:
 
34
  response = genai.chat(
35
  model=model,
36
  messages=[{"role": "user", "content": prompt}],
37
+ temperature=0.7,
38
  max_output_tokens=max_output_tokens
39
  )
40
+ return response.result['content']
41
  except Exception as e:
42
  return f"Error generating text: {str(e)}"
43
 
44
+ # PDF Text Extraction
45
+ def extract_text_from_pdf(file):
46
+ """
47
+ Extract text from uploaded PDF.
48
+
49
+ Args:
50
+ file (UploadedFile): PDF file uploaded via Streamlit.
51
+
52
+ Returns:
53
+ str: Extracted text or error message.
54
+ """
55
  try:
56
+ reader = PyPDF2.PdfReader(io.BytesIO(file.read()))
57
+ text = ''.join(page.extract_text() for page in reader.pages)
 
 
 
58
  return text.strip()
59
  except Exception as e:
60
  st.error(f"Error extracting text from PDF: {str(e)}")
61
  return ""
62
 
63
+ # Extract Contact Information
64
+ def extract_contact_info(text):
65
+ """
66
+ Extract email and phone number from text using regex.
67
 
68
+ Args:
69
+ text (str): Input text.
70
 
71
+ Returns:
72
+ tuple: Extracted email and phone number or "Not Available".
73
+ """
74
+ email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
75
+ phone = re.search(r"\+?[\d\s().-]{7,15}", text)
76
 
77
+ return (email.group(0) if email else "Not Available",
78
+ phone.group(0) if phone else "Not Available")
79
 
80
+ # Management Experience Extraction
81
  def extract_management_experience(text):
82
+ """
83
+ Extract management and leadership keywords and years.
84
+
85
+ Args:
86
+ text (str): Input resume text.
87
+
88
+ Returns:
89
+ tuple: Total years of experience and matching keywords.
90
+ """
91
+ keywords = ["manager", "team lead", "director", "executive", "supervisor", "leadership", "head"]
92
+ patterns = [
93
  r"(\d+)\s?(years|yrs|year)\s?of\s?(management|leadership)",
94
+ r"(\d+)\s?(years|yrs|year)\s?experience\s?(managing|leading)"
 
 
95
  ]
96
 
97
+ found_keywords = [kw for kw in keywords if kw in text.lower()]
98
+ years = sum(int(match[0]) for pattern in patterns for match in re.findall(pattern, text))
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ return years, ", ".join(found_keywords) if found_keywords else "Not Available"
 
101
 
102
+ # TF-IDF Match Percentage
103
  def calculate_match_percentage(resume_text, job_description):
104
+ """
105
+ Calculate similarity between resume and job description using TF-IDF.
106
+
107
+ Args:
108
+ resume_text (str): Resume content.
109
+ job_description (str): Job description.
 
 
110
 
111
+ Returns:
112
+ float: Match percentage (0-100).
113
+ """
114
  try:
115
+ vectorizer = TfidfVectorizer(stop_words='english')
116
+ tfidf_matrix = vectorizer.fit_transform([resume_text, job_description])
117
+ cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
118
+ return round(cosine_sim[0][0] * 100, 2)
 
 
 
 
 
 
 
 
119
  except Exception as e:
120
+ st.error(f"Error calculating match percentage: {str(e)}")
121
+ return 0.0
122
 
123
+ # Streamlit Interface
124
+ st.title("Resume Analysis Tool: Management & Leadership Focus")
125
+ st.markdown("### Upload Resume PDF and Enter Job Description")
126
 
127
  uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
128
+ job_description = st.text_area("Job Description", height=200)
129
 
130
  if uploaded_file and job_description.strip():
131
+ if st.button("Analyze"):
132
+ resume_text = extract_text_from_pdf(uploaded_file)
 
 
 
 
 
 
133
  if not resume_text:
134
+ st.error("Failed to extract text from PDF. Ensure the file is valid.")
135
  st.stop()
136
 
137
+ email, phone = extract_contact_info(resume_text)
138
+ management_years, management_keywords = extract_management_experience(resume_text)
 
 
 
 
 
 
139
  match_percentage = calculate_match_percentage(resume_text, job_description)
140
 
141
+ prompt = f"""
142
+ Analyze the resume with respect to the job description.
143
+ Resume Text: {resume_text}
144
+ Job Description: {job_description}
145
+ Include: Name, Skills, Education, Experience, and Match Percentage.
146
+ """
147
+ gemini_response = generate_response(prompt)
148
+
149
  results = {
150
  "Email": email,
151
+ "Contact": phone,
152
  "Management Experience (Years)": management_years,
153
+ "Keywords": management_keywords,
154
  "Match Percentage": match_percentage,
155
+ "AI Summary": gemini_response
156
  }
157
 
 
158
  st.write(pd.DataFrame([results]))
 
 
159
  csv = pd.DataFrame([results]).to_csv(index=False)
160
+ st.download_button("Download Results", data=csv, file_name="resume_analysis.csv", mime="text/csv")
 
 
 
 
 
161
  else:
162
+ st.info("Upload a resume and provide a job description to begin analysis.")