DreamStream-1 commited on
Commit
d607db1
·
verified ·
1 Parent(s): 6ef0bb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -12
app.py CHANGED
@@ -11,6 +11,9 @@ import PyPDF2
11
  from PIL import Image
12
  import pytesseract
13
  from pdf2image import convert_from_path
 
 
 
14
 
15
  # Download necessary NLTK data
16
  nltk.download('punkt')
@@ -54,6 +57,13 @@ def extract_text_with_ocr(pdf_file):
54
  text += pytesseract.image_to_string(image)
55
  return text
56
 
 
 
 
 
 
 
 
57
  # Unified PDF extraction function
58
  def extract_text_from_pdf(pdf_file):
59
  """Extract text using multiple methods."""
@@ -67,6 +77,9 @@ def extract_text_from_pdf(pdf_file):
67
  except Exception as e:
68
  print(f"Error with PyMuPDF: {e}")
69
 
 
 
 
70
  # Attempt PyPDF2 extraction
71
  try:
72
  text = extract_text_with_pypdf2(pdf_file)
@@ -75,6 +88,9 @@ def extract_text_from_pdf(pdf_file):
75
  except Exception as e:
76
  print(f"Error with PyPDF2: {e}")
77
 
 
 
 
78
  # Attempt OCR as a last resort
79
  try:
80
  text = extract_text_with_ocr(pdf_file)
@@ -134,6 +150,10 @@ def analyze_documents(resume_text, job_description):
134
  Resume: {resume_text}
135
  """
136
 
 
 
 
 
137
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
138
  headers = {'Content-Type': 'application/json'}
139
  data = {
@@ -142,7 +162,19 @@ def analyze_documents(resume_text, job_description):
142
  ]
143
  }
144
  response = requests.post(url, headers=headers, json=data)
145
- return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # Streamlit app configuration
148
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
@@ -166,16 +198,28 @@ resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx
166
  # Process the uploaded resume and job description
167
  if resume_file:
168
  if job_description:
169
- resume_text = resume_file.read() # Read the uploaded PDF or DOCX file
170
- resume_text = extract_text_from_pdf(resume_text) # Extract text from the PDF using the unified function
171
- cleaned_resume = clean_and_normalize_text(resume_text)
172
- cleaned_job_description = clean_and_normalize_text(job_description)
173
-
174
- # Analyze the resume and job description
175
- result = analyze_documents(cleaned_resume, cleaned_job_description)
176
-
177
- # Display the analysis results
178
- st.write(f"**Match Percentage**: {result['Match Percentage']}")
179
- st.write(f"**Recommendations**: {result['Recommendations']}")
 
 
 
 
 
 
 
 
 
 
 
 
180
  else:
181
  st.warning("Please enter the job description to begin analysis.")
 
11
  from PIL import Image
12
  import pytesseract
13
  from pdf2image import convert_from_path
14
+ import docx # For DOCX processing
15
+ import io
16
+ import os
17
 
18
  # Download necessary NLTK data
19
  nltk.download('punkt')
 
57
  text += pytesseract.image_to_string(image)
58
  return text
59
 
60
+ # Function for DOCX text extraction
61
+ def extract_text_from_docx(docx_file):
62
+ """Extract text from a DOCX file."""
63
+ doc = docx.Document(docx_file)
64
+ text = '\n'.join([para.text for para in doc.paragraphs])
65
+ return text
66
+
67
  # Unified PDF extraction function
68
  def extract_text_from_pdf(pdf_file):
69
  """Extract text using multiple methods."""
 
77
  except Exception as e:
78
  print(f"Error with PyMuPDF: {e}")
79
 
80
+ # Reset file pointer
81
+ pdf_file.seek(0)
82
+
83
  # Attempt PyPDF2 extraction
84
  try:
85
  text = extract_text_with_pypdf2(pdf_file)
 
88
  except Exception as e:
89
  print(f"Error with PyPDF2: {e}")
90
 
91
+ # Reset file pointer
92
+ pdf_file.seek(0)
93
+
94
  # Attempt OCR as a last resort
95
  try:
96
  text = extract_text_with_ocr(pdf_file)
 
150
  Resume: {resume_text}
151
  """
152
 
153
+ API_KEY = os.getenv("GEMINI_API_KEY") # Ensure you set this environment variable securely
154
+ if not API_KEY:
155
+ return {"Match Percentage": "API Key Missing", "Recommendations": "Please set the GEMINI_API_KEY environment variable."}
156
+
157
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
158
  headers = {'Content-Type': 'application/json'}
159
  data = {
 
162
  ]
163
  }
164
  response = requests.post(url, headers=headers, json=data)
165
+
166
+ if response.status_code == 200:
167
+ try:
168
+ result = response.json()
169
+ # Parse the response as needed. This is a placeholder.
170
+ return {
171
+ "Match Percentage": result.get('choices', [{}])[0].get('text', 'N/A').strip(),
172
+ "Recommendations": "Placeholder for actual recommendations."
173
+ }
174
+ except ValueError:
175
+ return {"Match Percentage": "Error", "Recommendations": "Failed to parse response."}
176
+ else:
177
+ return {"Match Percentage": "Error", "Recommendations": f"API request failed with status code {response.status_code}."}
178
 
179
  # Streamlit app configuration
180
  st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
 
198
  # Process the uploaded resume and job description
199
  if resume_file:
200
  if job_description:
201
+ try:
202
+ if resume_file.type == "application/pdf":
203
+ resume_text = extract_text_from_pdf(resume_file)
204
+ elif resume_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
205
+ resume_text = extract_text_from_docx(resume_file)
206
+ else:
207
+ st.error("Unsupported file type.")
208
+ resume_text = ""
209
+
210
+ if resume_text:
211
+ cleaned_resume = clean_and_normalize_text(resume_text)
212
+ cleaned_job_description = clean_and_normalize_text(job_description)
213
+
214
+ # Analyze the resume and job description
215
+ result = analyze_documents(cleaned_resume, cleaned_job_description)
216
+
217
+ # Display the analysis results
218
+ st.write(f"**Match Percentage**: {result.get('Match Percentage', 'N/A')}")
219
+ st.write(f"**Recommendations**: {result.get('Recommendations', 'N/A')}")
220
+ else:
221
+ st.error("Failed to extract text from the uploaded file.")
222
+ except Exception as e:
223
+ st.error(f"An error occurred during processing: {e}")
224
  else:
225
  st.warning("Please enter the job description to begin analysis.")