Spaces:
Sleeping
Sleeping
DreamStream-1
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,9 @@ import PyPDF2
|
|
11 |
from PIL import Image
|
12 |
import pytesseract
|
13 |
from pdf2image import convert_from_path
|
|
|
|
|
|
|
14 |
|
15 |
# Download necessary NLTK data
|
16 |
nltk.download('punkt')
|
@@ -54,6 +57,13 @@ def extract_text_with_ocr(pdf_file):
|
|
54 |
text += pytesseract.image_to_string(image)
|
55 |
return text
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
# Unified PDF extraction function
|
58 |
def extract_text_from_pdf(pdf_file):
|
59 |
"""Extract text using multiple methods."""
|
@@ -67,6 +77,9 @@ def extract_text_from_pdf(pdf_file):
|
|
67 |
except Exception as e:
|
68 |
print(f"Error with PyMuPDF: {e}")
|
69 |
|
|
|
|
|
|
|
70 |
# Attempt PyPDF2 extraction
|
71 |
try:
|
72 |
text = extract_text_with_pypdf2(pdf_file)
|
@@ -75,6 +88,9 @@ def extract_text_from_pdf(pdf_file):
|
|
75 |
except Exception as e:
|
76 |
print(f"Error with PyPDF2: {e}")
|
77 |
|
|
|
|
|
|
|
78 |
# Attempt OCR as a last resort
|
79 |
try:
|
80 |
text = extract_text_with_ocr(pdf_file)
|
@@ -134,6 +150,10 @@ def analyze_documents(resume_text, job_description):
|
|
134 |
Resume: {resume_text}
|
135 |
"""
|
136 |
|
|
|
|
|
|
|
|
|
137 |
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
|
138 |
headers = {'Content-Type': 'application/json'}
|
139 |
data = {
|
@@ -142,7 +162,19 @@ def analyze_documents(resume_text, job_description):
|
|
142 |
]
|
143 |
}
|
144 |
response = requests.post(url, headers=headers, json=data)
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
# Streamlit app configuration
|
148 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|
@@ -166,16 +198,28 @@ resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx
|
|
166 |
# Process the uploaded resume and job description
|
167 |
if resume_file:
|
168 |
if job_description:
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
else:
|
181 |
st.warning("Please enter the job description to begin analysis.")
|
|
|
11 |
from PIL import Image
|
12 |
import pytesseract
|
13 |
from pdf2image import convert_from_path
|
14 |
+
import docx # For DOCX processing
|
15 |
+
import io
|
16 |
+
import os
|
17 |
|
18 |
# Download necessary NLTK data
|
19 |
nltk.download('punkt')
|
|
|
57 |
text += pytesseract.image_to_string(image)
|
58 |
return text
|
59 |
|
60 |
+
# Function for DOCX text extraction
|
61 |
+
def extract_text_from_docx(docx_file):
|
62 |
+
"""Extract text from a DOCX file."""
|
63 |
+
doc = docx.Document(docx_file)
|
64 |
+
text = '\n'.join([para.text for para in doc.paragraphs])
|
65 |
+
return text
|
66 |
+
|
67 |
# Unified PDF extraction function
|
68 |
def extract_text_from_pdf(pdf_file):
|
69 |
"""Extract text using multiple methods."""
|
|
|
77 |
except Exception as e:
|
78 |
print(f"Error with PyMuPDF: {e}")
|
79 |
|
80 |
+
# Reset file pointer
|
81 |
+
pdf_file.seek(0)
|
82 |
+
|
83 |
# Attempt PyPDF2 extraction
|
84 |
try:
|
85 |
text = extract_text_with_pypdf2(pdf_file)
|
|
|
88 |
except Exception as e:
|
89 |
print(f"Error with PyPDF2: {e}")
|
90 |
|
91 |
+
# Reset file pointer
|
92 |
+
pdf_file.seek(0)
|
93 |
+
|
94 |
# Attempt OCR as a last resort
|
95 |
try:
|
96 |
text = extract_text_with_ocr(pdf_file)
|
|
|
150 |
Resume: {resume_text}
|
151 |
"""
|
152 |
|
153 |
+
API_KEY = os.getenv("GEMINI_API_KEY") # Ensure you set this environment variable securely
|
154 |
+
if not API_KEY:
|
155 |
+
return {"Match Percentage": "API Key Missing", "Recommendations": "Please set the GEMINI_API_KEY environment variable."}
|
156 |
+
|
157 |
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
|
158 |
headers = {'Content-Type': 'application/json'}
|
159 |
data = {
|
|
|
162 |
]
|
163 |
}
|
164 |
response = requests.post(url, headers=headers, json=data)
|
165 |
+
|
166 |
+
if response.status_code == 200:
|
167 |
+
try:
|
168 |
+
result = response.json()
|
169 |
+
# Parse the response as needed. This is a placeholder.
|
170 |
+
return {
|
171 |
+
"Match Percentage": result.get('choices', [{}])[0].get('text', 'N/A').strip(),
|
172 |
+
"Recommendations": "Placeholder for actual recommendations."
|
173 |
+
}
|
174 |
+
except ValueError:
|
175 |
+
return {"Match Percentage": "Error", "Recommendations": "Failed to parse response."}
|
176 |
+
else:
|
177 |
+
return {"Match Percentage": "Error", "Recommendations": f"API request failed with status code {response.status_code}."}
|
178 |
|
179 |
# Streamlit app configuration
|
180 |
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
|
|
|
198 |
# Process the uploaded resume and job description
|
199 |
if resume_file:
|
200 |
if job_description:
|
201 |
+
try:
|
202 |
+
if resume_file.type == "application/pdf":
|
203 |
+
resume_text = extract_text_from_pdf(resume_file)
|
204 |
+
elif resume_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
205 |
+
resume_text = extract_text_from_docx(resume_file)
|
206 |
+
else:
|
207 |
+
st.error("Unsupported file type.")
|
208 |
+
resume_text = ""
|
209 |
+
|
210 |
+
if resume_text:
|
211 |
+
cleaned_resume = clean_and_normalize_text(resume_text)
|
212 |
+
cleaned_job_description = clean_and_normalize_text(job_description)
|
213 |
+
|
214 |
+
# Analyze the resume and job description
|
215 |
+
result = analyze_documents(cleaned_resume, cleaned_job_description)
|
216 |
+
|
217 |
+
# Display the analysis results
|
218 |
+
st.write(f"**Match Percentage**: {result.get('Match Percentage', 'N/A')}")
|
219 |
+
st.write(f"**Recommendations**: {result.get('Recommendations', 'N/A')}")
|
220 |
+
else:
|
221 |
+
st.error("Failed to extract text from the uploaded file.")
|
222 |
+
except Exception as e:
|
223 |
+
st.error(f"An error occurred during processing: {e}")
|
224 |
else:
|
225 |
st.warning("Please enter the job description to begin analysis.")
|