AI_Resume_helper / backend.py
kedar-bhumkar's picture
Upload 19 files
36652ef verified
import os
import docx2txt
import docx
from docx import Document
import openai
from dotenv import load_dotenv
import json
import tempfile
import re
# Add these imports for PDF support
import PyPDF2
import io
# Load environment variables
load_dotenv()
# Set up OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Default model
DEFAULT_MODEL = "gpt-4o"
current_model = DEFAULT_MODEL
# Function to set OpenAI API key
def set_openai_api_key(api_key=None):
"""
Set the OpenAI API key from the provided key or environment variable.
Args:
api_key: Optional API key to use. If None, will use the environment variable.
Returns:
bool: True if API key is set, False otherwise
"""
if api_key:
openai.api_key = api_key
return True
else:
env_api_key = os.getenv("OPENAI_API_KEY")
if env_api_key:
openai.api_key = env_api_key
return True
return False
# Function to set OpenAI model
def set_openai_model(model_name="gpt-4o"):
"""
Set the OpenAI model to use for API calls.
Args:
model_name: Name of the model to use (e.g., "gpt-4o", "gpt-4o-mini")
Returns:
str: The name of the model that was set
"""
global current_model
current_model = model_name
return current_model
# Set up OpenAI API key from environment variable initially
set_openai_api_key()
def extract_text_from_document(file_path):
"""
Extract text from a document file (DOCX or PDF).
Args:
file_path: Path to the document file
Returns:
str: Extracted text from the document
"""
try:
# Check file extension
if file_path.lower().endswith('.docx'):
# Extract text from DOCX
return docx2txt.process(file_path)
elif file_path.lower().endswith('.pdf'):
# Extract text from PDF
text = ""
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() + "\n\n"
return text
else:
raise ValueError(f"Unsupported file format: {os.path.splitext(file_path)[1]}")
except Exception as e:
print(f"Error extracting text from document: {e}")
return None
def parse_resume(file_path):
"""
Parse a resume file and extract its content.
Args:
file_path: Path to the resume file
Returns:
str: Extracted text from the resume
"""
try:
return extract_text_from_document(file_path)
except Exception as e:
print(f"Error parsing resume: {e}")
return None
def analyze_resume_job_match(resume_text, job_description, creativity_level=30):
"""
Analyze the match between a resume and job description using GPT-4o.
Args:
resume_text: Raw text of the resume
job_description: Job description text
creativity_level: Level of creativity/modification allowed (0-100)
Returns:
dict: Analysis results including match percentage, gaps, and suggestions
"""
try:
print(f"Analyzing resume match with creativity level: {creativity_level}")
print(f"Resume text length: {len(resume_text)}")
print(f"Job description length: {len(job_description)}")
# Adjust system message based on creativity level
if creativity_level < 20:
system_message = "You are a conservative resume analyzer. Focus only on exact matches between the resume and job description. Be strict in your evaluation."
elif creativity_level < 50:
system_message = "You are a balanced resume analyzer. Evaluate the resume against the job description with a moderate level of flexibility, recognizing transferable skills."
elif creativity_level < 80:
system_message = "You are a creative resume analyzer. Be generous in your evaluation, recognizing potential and transferable skills even when not explicitly stated."
else:
system_message = "You are an optimistic resume analyzer. Focus on potential rather than exact matches. Be very generous in your evaluation and provide ambitious suggestions for improvement."
prompt = f"""
Analyze the following resume and job description:
RESUME:
{resume_text}
JOB DESCRIPTION:
{job_description}
CREATIVITY LEVEL: {creativity_level}% (where 0% means strictly factual and 100% means highly creative)
Provide a detailed analysis in JSON format with the following structure:
1. "match_percentage": A numerical percentage (0-100) representing how well the resume matches the job description. Use job skills keyword used in job description to match with the contents of the resume to come up with the match percentage.
2. "key_matches": List of skills and experiences in the resume that match the job requirements.
3. "gaps": List of skills or experiences mentioned in the job description that are missing from the resume.
4. "suggestions": Specific suggestions to improve the resume for this job. Ensure the suggestions are based on the job description and the resume and contain the exact keywords from the job description.
5. "summary": A brief summary of the overall match and main recommendations.
6. "skill_breakdown": An object containing categories of skills from the job description and how well the candidate matches each category:
- "technical_skills": Assessment of technical skills match (percentage and comments)
- "experience": Assessment of experience requirements match (percentage and comments)
- "education": Assessment of education requirements match (percentage and comments)
- "soft_skills": Assessment of soft skills/leadership match (percentage and comments)
Adjust your analysis based on the creativity level. Higher creativity means being more generous with matches and more ambitious with suggestions.
IMPORTANT: For each resume, provide a unique and accurate match percentage based on the actual content. Do not use the same percentage for different resumes. The excellent match resume should have a high percentage (80-95%), good match should be moderate-high (65-80%), average match should be moderate (40-65%), and poor match should be low (below 40%).
Return ONLY the JSON object without any additional text.
"""
response = openai.chat.completions.create(
model=current_model,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"}
)
analysis = json.loads(response.choices[0].message.content)
print(f"Analysis complete. Match percentage: {analysis.get('match_percentage', 0)}%")
return analysis
except Exception as e:
print(f"Error analyzing resume: {e}")
return {
"match_percentage": 0,
"key_matches": [],
"gaps": ["Error analyzing resume"],
"suggestions": ["Please try again"],
"summary": f"Error: {str(e)}",
"skill_breakdown": {
"technical_skills": {"percentage": 0, "comments": "Error analyzing resume"},
"experience": {"percentage": 0, "comments": "Error analyzing resume"},
"education": {"percentage": 0, "comments": "Error analyzing resume"},
"soft_skills": {"percentage": 0, "comments": "Error analyzing resume"}
}
}
def tailor_resume(resume_text, job_description, template_path=None, creativity_level=30, verbosity="elaborate"):
"""
Generate a tailored resume based on the job description using GPT-4o.
Args:
resume_text: Raw text of the original resume
job_description: Job description text
template_path: Optional path to a resume template
creativity_level: Level of creativity/modification allowed (0-100)
verbosity: Level of detail in the resume ('concise' or 'elaborate')
Returns:
str: Tailored resume content
"""
try:
# If a template is provided, read its structure
template_structure = ""
template_sections = []
if template_path:
# Extract the template structure and sections
doc = Document(template_path)
for para in doc.paragraphs:
if para.text.strip():
template_structure += para.text + "\n"
# Identify section headings (usually in all caps or with specific styles)
if para.style.name.startswith('Heading') or para.text.isupper() or (para.runs and para.runs[0].bold):
template_sections.append(para.text.strip())
# Adjust system message based on creativity level
if creativity_level < 20:
system_message = "You are a conservative resume editor. Only reorganize existing content to better match the job description. Do not add any new experiences or skills that aren't explicitly mentioned in the original resume."
elif creativity_level < 50:
system_message = "You are a balanced resume editor. Enhance existing content with better wording and highlight relevant skills. Make minor improvements but keep all content factual and based on the original resume."
elif creativity_level < 80:
system_message = "You are a creative resume editor. Significantly enhance the resume with improved wording and may suggest minor additions or extensions of existing experiences to better match the job description."
else:
system_message = "You are an aggressive resume optimizer. Optimize the resume to perfectly match the job description, including suggesting new skills and experiences that would make the candidate more competitive, while maintaining some connection to their actual background. Ensure exact keywords are included from the job description in the new resume. "
# Adjust system message based on creativity level and verbosity
if creativity_level < 20:
base_message = "You are a conservative resume editor. Only reorganize existing content to better match the job description. Do not add any new experiences or skills that aren't explicitly mentioned in the original resume."
elif creativity_level < 50:
base_message = "You are a balanced resume editor. Enhance existing content with better wording and highlight relevant skills. Make minor improvements but keep all content factual and based on the original resume."
elif creativity_level < 80:
base_message = "You are a creative resume editor. Significantly enhance the resume with improved wording and may suggest minor additions or extensions of existing experiences to better match the job description."
else:
base_message = "You are an aggressive resume optimizer. Optimize the resume to perfectly match the job description, including suggesting new skills and experiences that would make the candidate more competitive, while maintaining some connection to their actual background. Ensure exact keywords are included from the job description in the new resume."
# Add verbosity instructions to the system message
if verbosity == "concise":
system_message = base_message + " Create a concise resume with brief bullet points, focusing only on the most relevant information. Aim for a shorter resume that can be quickly scanned by recruiters."
else: # elaborate
system_message = base_message + " Create a detailed resume that thoroughly explains experiences and skills, providing context and specific achievements. Use comprehensive bullet points to showcase the candidate's qualifications."
prompt = f"""
Create a tailored version of this resume to better match the job description:
ORIGINAL RESUME:
{resume_text}
JOB DESCRIPTION:
{job_description}
{("TEMPLATE STRUCTURE TO FOLLOW:" + chr(10) + template_structure) if template_path else ""}
{("TEMPLATE SECTIONS TO INCLUDE:" + chr(10) + chr(10).join(template_sections)) if template_sections else ""}
CREATIVITY LEVEL: {creativity_level}% (where 0% means strictly factual and 100% means highly creative)
VERBOSITY: {verbosity.upper()} (CONCISE means brief and to-the-point, ELABORATE means detailed and comprehensive)
Create a tailored resume that:
1. Highlights relevant skills and experiences that match the job description
2. Uses keywords from the job description
3. Quantifies achievements where possible
4. Removes or downplays irrelevant information
5. Adjusts content based on the specified creativity level
IMPORTANT FORMATTING INSTRUCTIONS:
- Format your response with clear section headings in ALL CAPS
- Use bullet points (•) for listing items and achievements
- If a template is provided, follow its exact section structure and organization
- Maintain the same section headings as in the template when possible
- For each section, provide content that matches the requested verbosity level:
* CONCISE: Use 1-2 line bullet points, focus only on the most relevant achievements
* ELABORATE: Use detailed bullet points with context and specific metrics
Return the complete tailored resume content in a professional format.
"""
response = openai.chat.completions.create(
model=current_model,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt}
]
)
tailored_resume = response.choices[0].message.content
return tailored_resume
except Exception as e:
print(f"Error tailoring resume: {e}")
return f"Error tailoring resume: {str(e)}"
def create_word_document(content, output_path, template_path=None):
"""
Create a Word document with the given content, optionally using a template.
Args:
content: Text content for the document
output_path: Path to save the document
template_path: Optional path to a template document to use as a base
Returns:
bool: Success status
"""
try:
# If a template is provided, use it as the base document
if template_path and os.path.exists(template_path):
# Create a new document instead of modifying the template directly
doc = Document()
original_template = Document(template_path)
# Copy all styles from the template to the new document
for style in original_template.styles:
if style.name not in doc.styles:
try:
doc.styles.add_style(style.name, style.type)
except:
pass # Style might already exist or be built-in
# Copy document properties and sections settings
# We'll only add sections as needed, not at the beginning
if len(original_template.sections) > 0:
# Copy properties from the first section
section = original_template.sections[0]
# Use the existing first section in the new document
new_section = doc.sections[0]
new_section.page_height = section.page_height
new_section.page_width = section.page_width
new_section.left_margin = section.left_margin
new_section.right_margin = section.right_margin
new_section.top_margin = section.top_margin
new_section.bottom_margin = section.bottom_margin
new_section.header_distance = section.header_distance
new_section.footer_distance = section.footer_distance
# Copy additional sections if needed
for i in range(1, len(original_template.sections)):
section = original_template.sections[i]
new_section = doc.add_section()
new_section.page_height = section.page_height
new_section.page_width = section.page_width
new_section.left_margin = section.left_margin
new_section.right_margin = section.right_margin
new_section.top_margin = section.top_margin
new_section.bottom_margin = section.bottom_margin
new_section.header_distance = section.header_distance
new_section.footer_distance = section.footer_distance
# Copy complex elements like headers and footers
copy_template_complex_elements(original_template, doc)
# Split content by sections (using headings as delimiters)
sections = []
current_section = []
lines = content.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
# Check if this is a heading (all caps or ends with a colon)
if line.isupper() or (line.endswith(':') and len(line) < 50):
if current_section:
sections.append(current_section)
current_section = [line]
else:
current_section.append(line)
if current_section:
sections.append(current_section)
# Find template headings to match with our content sections
template_headings = []
template_heading_styles = {}
for para in original_template.paragraphs:
if para.style.name.startswith('Heading') or para.text.isupper() or (para.runs and para.runs[0].bold):
template_headings.append(para.text.strip())
template_heading_styles[para.text.strip()] = para.style.name
# Add content to the document with appropriate formatting
for section in sections:
if not section:
continue
# First line of each section is treated as a heading
heading = section[0]
# Try to find a matching heading style from the template
heading_style = 'Heading 1' # Default
for template_heading in template_headings:
if template_heading.upper() == heading.upper() or template_heading.upper() in heading.upper() or heading.upper() in template_heading.upper():
heading_style = template_heading_styles.get(template_heading, 'Heading 1')
break
# Add the heading with the appropriate style
p = doc.add_paragraph()
try:
p.style = heading_style
except:
p.style = 'Heading 1' # Fallback
run = p.add_run(heading)
run.bold = True
# Add the rest of the section content
for line in section[1:]:
if line.startswith('•') or line.startswith('-') or line.startswith('*'):
# This is a bullet point
p = doc.add_paragraph(line[1:].strip(), style='List Bullet')
else:
p = doc.add_paragraph(line)
else:
# Create a new document with basic formatting
doc = Document()
# Split content by lines and add to document with basic formatting
paragraphs = content.split('\n')
for para in paragraphs:
para = para.strip()
if not para:
continue
# Check if this is a heading (all caps or ends with a colon)
if para.isupper() or (para.endswith(':') and len(para) < 50):
p = doc.add_paragraph()
p.style = 'Heading 1'
run = p.add_run(para)
run.bold = True
elif para.startswith('•') or para.startswith('-') or para.startswith('*'):
# This is a bullet point
p = doc.add_paragraph(para[1:].strip(), style='List Bullet')
else:
doc.add_paragraph(para)
doc.save(output_path)
return True
except Exception as e:
print(f"Error creating Word document: {e}")
return False
def get_available_templates():
"""
Get a list of available resume templates from the current working directory.
Returns:
list: List of template file paths
"""
# First, copy any templates from templates directory if they don't exist
#copy_templates_to_current_directory()
templates = []
# Check current directory for templates
for file in os.listdir("."):
if file.endswith("_Template.docx") or file.endswith("Template.docx"):
templates.append(file)
print(f"Found templates in current directory: {templates}")
return templates
def copy_templates_to_current_directory():
"""
Copy templates from the templates directory to the current working directory
if they don't already exist.
Returns:
list: List of copied template file paths
"""
copied_templates = []
templates_dir = "templates"
# Check if templates directory exists
if not os.path.exists(templates_dir):
print(f"Templates directory '{templates_dir}' not found.")
return copied_templates
# Get list of template files in templates directory
template_files = [f for f in os.listdir(templates_dir) if f.endswith(".docx")]
# Copy each template file to current directory if it doesn't exist
for template_file in template_files:
source_path = os.path.join(templates_dir, template_file)
dest_path = template_file
if not os.path.exists(dest_path):
try:
import shutil
shutil.copy2(source_path, dest_path)
copied_templates.append(dest_path)
print(f"Copied template '{template_file}' to current directory.")
except Exception as e:
print(f"Error copying template '{template_file}': {e}")
else:
print(f"Template '{template_file}' already exists in current directory.")
return copied_templates
def copy_template_complex_elements(source_doc, target_doc):
"""
Copy complex elements like headers and footers from source document to target document.
Args:
source_doc: Source Document object
target_doc: Target Document object
"""
try:
# Copy headers and footers
for i, section in enumerate(target_doc.sections):
# Skip if source doesn't have this many sections
if i >= len(source_doc.sections):
break
# Copy header
if section.header.is_linked_to_previous == False:
# Check if there's at least one paragraph in the header
if len(section.header.paragraphs) == 0:
section.header.add_paragraph()
# Copy text and style from source header paragraphs
for j, para in enumerate(source_doc.sections[i].header.paragraphs):
if j < len(section.header.paragraphs):
section.header.paragraphs[j].text = para.text
try:
section.header.paragraphs[j].style = para.style
except Exception:
pass # Style might not be compatible
else:
new_para = section.header.add_paragraph(para.text)
try:
new_para.style = para.style
except Exception:
pass
# Copy footer
if section.footer.is_linked_to_previous == False:
# Check if there's at least one paragraph in the footer
if len(section.footer.paragraphs) == 0:
section.footer.add_paragraph()
# Copy text and style from source footer paragraphs
for j, para in enumerate(source_doc.sections[i].footer.paragraphs):
if j < len(section.footer.paragraphs):
section.footer.paragraphs[j].text = para.text
try:
section.footer.paragraphs[j].style = para.style
except Exception:
pass # Style might not be compatible
else:
new_para = section.footer.add_paragraph(para.text)
try:
new_para.style = para.style
except Exception:
pass
except Exception as e:
print(f"Error copying complex elements: {e}")
def create_tailored_resume_from_template(content, template_path, output_path):
"""
Create a tailored resume by directly modifying a template document.
This preserves all formatting, tables, and styles from the original template.
Args:
content: Structured content for the resume (text)
template_path: Path to the template document
output_path: Path to save the output document
Returns:
bool: Success status
"""
try:
if not os.path.exists(template_path):
return False
# Create a copy of the template
doc = Document(template_path)
# Parse the content into sections
sections = {}
current_section = None
current_content = []
for line in content.split('\n'):
line = line.strip()
if not line:
continue
# Check if this is a heading (all caps or ends with a colon)
if line.isupper() or (line.endswith(':') and len(line) < 50):
# Save the previous section
if current_section and current_content:
sections[current_section] = current_content
# Start a new section
current_section = line
current_content = []
else:
if current_section:
current_content.append(line)
# Save the last section
if current_section and current_content:
sections[current_section] = current_content
# Find all paragraphs in the template that are headings or potential section markers
template_sections = {}
for i, para in enumerate(doc.paragraphs):
if para.style.name.startswith('Heading') or para.text.isupper() or para.runs and para.runs[0].bold:
template_sections[para.text.strip()] = i
# Create a new document to avoid duplicate content
new_doc = Document()
# Copy all styles from the template to the new document
for style in doc.styles:
if style.name not in new_doc.styles:
try:
new_doc.styles.add_style(style.name, style.type)
except:
pass # Style might already exist or be built-in
# Copy document properties and sections settings
# We'll only add sections as needed, not at the beginning
if len(doc.sections) > 0:
# Copy properties from the first section
section = doc.sections[0]
# Use the existing first section in the new document
new_section = new_doc.sections[0]
new_section.page_height = section.page_height
new_section.page_width = section.page_width
new_section.left_margin = section.left_margin
new_section.right_margin = section.right_margin
new_section.top_margin = section.top_margin
new_section.bottom_margin = section.bottom_margin
new_section.header_distance = section.header_distance
new_section.footer_distance = section.footer_distance
# Copy additional sections if needed
for i in range(1, len(doc.sections)):
section = doc.sections[i]
new_section = new_doc.add_section()
new_section.page_height = section.page_height
new_section.page_width = section.page_width
new_section.left_margin = section.left_margin
new_section.right_margin = section.right_margin
new_section.top_margin = section.top_margin
new_section.bottom_margin = section.bottom_margin
new_section.header_distance = section.header_distance
new_section.footer_distance = section.footer_distance
# Copy complex elements like headers and footers
copy_template_complex_elements(doc, new_doc)
# Replace content in the template with our tailored content
# First, create a mapping between our sections and template sections
section_mapping = {}
for our_section in sections.keys():
best_match = None
best_score = 0
for template_section in template_sections.keys():
# Calculate similarity between section headings
if template_section.upper() == our_section.upper():
# Exact match
best_match = template_section
break
elif template_section.upper() in our_section.upper() or our_section.upper() in template_section.upper():
# Partial match
score = len(set(template_section.upper()) & set(our_section.upper())) / max(len(template_section), len(our_section))
if score > best_score:
best_score = score
best_match = template_section
if best_match and best_score > 0.5:
section_mapping[our_section] = best_match
# Add content to the new document based on the template structure
for our_section, content_lines in sections.items():
# Add section heading
p = new_doc.add_paragraph()
# Try to find a matching heading style from the template
if our_section in section_mapping:
template_section = section_mapping[our_section]
section_index = template_sections[template_section]
template_para = doc.paragraphs[section_index]
try:
p.style = template_para.style.name
except:
p.style = 'Heading 1' # Fallback
# Copy formatting from template paragraph
for run in template_para.runs:
if run.text.strip():
p_run = p.add_run(our_section)
p_run.bold = run.bold
p_run.italic = run.italic
p_run.underline = run.underline
if run.font.name:
p_run.font.name = run.font.name
if run.font.size:
p_run.font.size = run.font.size
if run.font.color.rgb:
p_run.font.color.rgb = run.font.color.rgb
break
else:
# If no runs with text, add a default run
p_run = p.add_run(our_section)
p_run.bold = True
else:
# No matching template section, use default formatting
p.style = 'Heading 1'
p_run = p.add_run(our_section)
p_run.bold = True
# Add section content
for line in content_lines:
if line.startswith('•') or line.startswith('-') or line.startswith('*'):
# This is a bullet point
p = new_doc.add_paragraph(line[1:].strip(), style='List Bullet')
else:
p = new_doc.add_paragraph(line)
new_doc.save(output_path)
return True
except Exception as e:
print(f"Error creating tailored resume from template: {e}")
return False
def convert_text_to_word(text_file_path, output_docx_path):
"""
Convert a text file to a Word document.
Args:
text_file_path: Path to the text file
output_docx_path: Path where the Word document will be saved
Returns:
bool: True if successful, False otherwise
"""
try:
# Read the text file
with open(text_file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Create a new Word document
doc = Document()
# Split the content by double newlines to identify paragraphs
paragraphs = content.split('\n\n')
# Process each paragraph
for i, para_text in enumerate(paragraphs):
# Skip empty paragraphs
if not para_text.strip():
continue
# Check if this looks like a heading (all caps or ends with a colon)
is_heading = para_text.isupper() or para_text.strip().endswith(':')
# Add the paragraph to the document
paragraph = doc.add_paragraph(para_text.strip())
# Apply formatting based on position and content
if i == 0: # First paragraph is likely the name
paragraph.style = 'Title'
elif is_heading:
paragraph.style = 'Heading 2'
else:
paragraph.style = 'Normal'
# Save the document
doc.save(output_docx_path)
return True
except Exception as e:
print(f"Error converting text to Word: {e}")
return False
def convert_sample_resumes():
"""
Convert all sample text resumes to Word documents.
Returns:
list: Paths to the created Word documents
"""
sample_files = [
"excellent_match_resume.docx.txt",
"good_match_resume.docx.txt",
"average_match_resume.docx.txt",
"poor_match_resume.docx.txt"
]
created_files = []
for text_file in sample_files:
if os.path.exists(text_file):
output_path = text_file.replace('.docx.txt', '.docx')
print(f"Converting {text_file} to {output_path}...")
if convert_text_to_word(text_file, output_path):
created_files.append(output_path)
print(f"Successfully created {output_path}")
else:
print(f"Failed to create {output_path}")
else:
print(f"Sample file not found: {text_file}")
return created_files