CDF-HR / app.py
DreamStream-1's picture
Update app.py
c28a311 verified
import re
import pandas as pd
import streamlit as st
import requests
from PyPDF2 import PdfReader
from docx import Document
# Set up API key for Google Generative Language
API_KEY = st.secrets["GOOGLE_API_KEY"]
def extract_text_from_pdf(pdf_file):
"""Extract text from PDF file."""
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def extract_text_from_docx(docx_file):
"""Extract text from DOCX file."""
doc = Document(docx_file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
def extract_contact_info(resume_text):
"""Extract name, email, and phone number from resume text."""
# Regex patterns for email and phone
email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
phone_pattern = r'\+?\d[\d -]{8,12}\d'
# Attempt to extract contact details
email = re.search(email_pattern, resume_text)
phone = re.search(phone_pattern, resume_text)
# Extract Name (assuming first line of the resume might be the name)
name = resume_text.splitlines()[0].strip() if resume_text else "Name Not Found"
return {
"name": name,
"email": email.group() if email else "Email Not Found",
"phone": phone.group() if phone else "Contact Not Found"
}
def analyze_documents(resume_text, job_description):
custom_prompt = f"""
Please analyze the following resume in the context of the job description provided. Strictly check every single line in the job description and analyze my resume whether there is a match exactly. Strictly maintain high ATS standards and give scores only to the correct ones. Focus on hard skills which are missing and also soft skills which are missing. Provide the following details.:
1. The match percentage of the resume to the job description. Display this.
2. A list of missing keywords accurate ones.
3. Final thoughts on the resume's overall match with the job description in 3 lines.
4. Recommendations on how to add the missing keywords and improve the resume in 3-4 points with examples.
Please display in the above order don't mention the numbers like 1. 2. etc and strictly follow ATS standards so that analysis will be accurate. Strictly follow the above templates omg. don't keep changing every time.
Strictly follow the above things and template which has to be displayed and don't keep changing again and again. Don't fucking change the template from above.
Title should be Resume analysis and maintain the same title for all. Also if someone uploads the same unchanged resume twice, keep in mind to give the same results. Display new ones only if they have changed their resume according to your suggestions or at least few changes.
Job Description: {job_description}
Resume: {resume_text}
"""
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
headers = {'Content-Type': 'application/json'}
data = {
"contents": [
{"role": "user", "parts": [{"text": custom_prompt}]}
]
}
response = requests.post(url, headers=headers, json=data)
return response.json()
def display_resume(file, index):
"""Display uploaded resume content with a unique key."""
file_type = file.name.split('.')[-1].lower()
unique_key = f"{file.name}_{index}" # Ensure the key is unique by appending an index
if file_type == 'pdf':
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
elif file_type == 'docx':
doc = Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
st.text_area(f"Parsed Resume Content - {file.name}", text, height=400, key=unique_key)
else:
st.error("Unsupported file type. Please upload a PDF or DOCX file.")
# Streamlit page configuration for better layout
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")
# Custom styles for headers, buttons, and sections
st.markdown("""
<style>
.highlight {
background-color: #f9f9f9;
border-left: 6px solid #4CAF50;
padding: 10px;
margin-bottom: 10px;
font-size: 16px;
}
.highlight-bold {
font-weight: bold;
color: #4CAF50;
}
</style>
""", unsafe_allow_html=True)
# Main Title
st.markdown('<h1 style="text-align: center; color: #4CAF50;">πŸ“„πŸ” Resume Evaluation System πŸ“Š</h1>', unsafe_allow_html=True)
# Job Description Input
job_description = st.text_area("Job Description:", height=200, placeholder="Paste the job description here...")
# Resume Upload
resumes = st.file_uploader("Upload Your Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
highlighted_data = [] # List to store the highlighted results
if resumes:
st.write("Uploaded Resumes:")
for index, resume in enumerate(resumes):
st.write(f"πŸ“„ {resume.name}")
display_resume(resume, index)
# Analysis Button
if st.button("Analyze Resumes"):
if job_description and resumes:
if len(resumes) <= 10:
for index, resume in enumerate(resumes):
resume.seek(0) # Reset file pointer
file_type = resume.name.split('.')[-1].lower()
if file_type == 'pdf':
resume_text = extract_text_from_pdf(resume)
elif file_type == 'docx':
resume_text = extract_text_from_docx(resume)
contact_info = extract_contact_info(resume_text)
analysis = analyze_documents(resume_text, job_description)
# Extract match percentage
match_percentage = "Not Found"
if "candidates" in analysis:
for candidate in analysis["candidates"]:
if "content" in candidate and "parts" in candidate["content"]:
for part in candidate["content"]["parts"]:
response_text = part["text"]
if "match percentage" in response_text.lower():
match_percentage = re.search(r'\d{1,3}%', response_text)
match_percentage = match_percentage.group(0) if match_percentage else "Not Found"
# Add the data to the list for CSV export and table
highlighted_data.append({
"Name": contact_info["name"],
"Email": contact_info["email"],
"Contact": contact_info["phone"],
"Match Percentage": match_percentage
})
# Display contact info and match percentage
st.markdown(f"""
<div class="highlight">
<p><span class="highlight-bold">Name:</span> {contact_info['name']}</p>
<p><span class="highlight-bold">Email:</span> {contact_info['email']}</p>
<p><span class="highlight-bold">Contact:</span> {contact_info['phone']}</p>
<p><span class="highlight-bold">Match Percentage:</span> <span style="color: green; font-weight: bold;">{match_percentage}</span></p>
</div>
""", unsafe_allow_html=True)
# Display results in a table
if highlighted_data:
df = pd.DataFrame(highlighted_data)
st.write("### Highlighted Results")
st.table(df)
# Provide CSV download button
csv = df.to_csv(index=False)
st.download_button(
label="Download Results as CSV",
data=csv,
file_name="resume_analysis_results.csv",
mime="text/csv"
)
st.success("Analysis Complete!")
else:
st.error("You can upload a maximum of 10 resumes.")
else:
st.error("Please provide the job description and upload resumes.")