File size: 8,309 Bytes
feb8bf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
from docx import Document
from docx.shared import Pt, RGBColor
import tempfile
import re
import logging
import os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
STOP_PHRASES_LOWER = {
# (add your full STOP_PHRASES_LOWER set here)
}
def extract_keywords_from_text(text):
if not text:
return []
extracted_terms = set()
tech_keyword_regex = r"""
\b(?:
[A-Z]{2,}[A-Z0-9./+#-]*[A-Z0-9]? |
[A-Za-z][A-Za-z0-9]*\.(?:NET|js|io|ai|com|org) |
[A-Za-z][A-Za-z0-9]*\+\+ |
[A-Z][A-Za-z0-9#+-]+ |
[A-Z][a-z]+[A-Z][A-Za-z0-9]+
)\b
"""
for match in re.finditer(tech_keyword_regex, text, re.VERBOSE):
term = match.group(0).strip()
if term:
extracted_terms.add(term)
main_word_pattern = r"(?:[A-Z][A-Za-z0-9'-]*|[A-Z]{2,}[A-Z0-9'-]*)"
connector_pattern = r"(?:\s+(?:of|and|for|in|the|to|with|&|on|as|at|by|per)\s+)?"
for num_main_words in range(2, 6):
current_phrase_regex_str = r"\b" + main_word_pattern + \
(connector_pattern + main_word_pattern) * (num_main_words - 1) + \
r"\b"
try:
for match in re.finditer(current_phrase_regex_str, text):
term = match.group(0).strip()
term = re.sub(r'\s+', ' ', term)
if term:
extracted_terms.add(term)
except re.error as e:
logger.warning(f"Regex error for multi-word phrases (N={num_main_words}): {e}.")
continue
final_cleaned_terms = set()
for term in extracted_terms:
term_normalized = term.strip()
if not term_normalized or len(term_normalized) < 1 or len(term_normalized) > 75:
continue
if term_normalized.lower() in STOP_PHRASES_LOWER:
continue
if not re.search(r'[A-Za-z0-9]', term_normalized):
continue
final_cleaned_terms.add(term_normalized)
return sorted(final_cleaned_terms, key=lambda x: (-len(x), x.lower()))
def extract_ats_keywords(job_description_text):
logger.info("Extracting keywords from Job Description...")
return extract_keywords_from_text(job_description_text)
def extract_resume_keywords(resume_text):
logger.info("Extracting keywords from Resume...")
return extract_keywords_from_text(resume_text)
def analyze_resume_for_suggestions(docx_file_obj, job_description_text):
if docx_file_obj is None or not job_description_text.strip():
return "β οΈ Error: Please upload your resume and paste the job description.", gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), None
temp_resume_path = docx_file_obj.name
resume_doc = Document(temp_resume_path)
resume_text = "\n".join([p.text for p in resume_doc.paragraphs])
ats_keywords = extract_ats_keywords(job_description_text)
resume_keywords = extract_resume_keywords(resume_text)
ats_keywords_lower_set = {kw.lower() for kw in ats_keywords}
resume_keywords_lower_set = {kw.lower() for kw in resume_keywords}
matching_keywords = [kw for kw in ats_keywords if kw.lower() in resume_keywords_lower_set]
missing_keywords = [kw for kw in ats_keywords if kw.lower() not in resume_keywords_lower_set]
coverage = len(matching_keywords) / len(ats_keywords_lower_set) * 100 if ats_keywords_lower_set else 0
results_str = f"π **Resume Scan Results** π\n\n"
results_str += f"π **Job Description Keywords:**\n_{', '.join(ats_keywords)}_\n\n"
results_str += f"π **Keywords Found in Your Resume:**\n_{', '.join(resume_keywords)}_\n\n"
results_str += f"β
**Matching Keywords:**\n_{', '.join(matching_keywords)}_\n\n"
results_str += f"π― **Keyword Coverage: {coverage:.2f}%**\n\n"
TARGET_COVERAGE = 75.0
if coverage >= TARGET_COVERAGE:
results_str += f"π **Great! Your resume meets {TARGET_COVERAGE}% coverage!** π"
return results_str, gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), temp_resume_path
needed = max(0, int(len(ats_keywords_lower_set) * (TARGET_COVERAGE/100.0)) - len(matching_keywords))
results_str += f"π‘ **Suggested Keywords to Add (select below):** Add approx **{needed}** more keywords."
return results_str, gr.update(choices=missing_keywords, value=[], visible=True), gr.update(visible=True), gr.update(visible=True), temp_resume_path
def add_selected_keywords_to_docx(selected_keywords, resume_path_from_state):
yield "β³ Processing... Adding selected keywords to your resume.", gr.update(value=None, visible=False, interactive=False)
if not selected_keywords:
yield "βΉοΈ No keywords selected; nothing to do.", gr.update(value=None, visible=False, interactive=False)
return
if not resume_path_from_state or not os.path.exists(resume_path_from_state):
yield "β οΈ Error: Original resume path not found. Please reβanalyze.", gr.update(value=None, visible=False, interactive=False)
return
resume_doc = Document(resume_path_from_state)
keywords_string = " ".join(selected_keywords)
p = resume_doc.add_paragraph()
run = p.add_run(keywords_string)
run.font.size = Pt(1)
run.font.color.rgb = RGBColor(255, 255, 255)
with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
modified_resume_path = tmp_file.name
resume_doc.save(modified_resume_path)
yield f"β
Resume updated: {len(selected_keywords)} keyword(s) appended as white 1β―pt text.", gr.update(value=modified_resume_path, visible=True, interactive=True)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# π ATS Resume Optimizer")
gr.Markdown("Upload .docx, paste JD, analyze, select keywords to add.")
resume_path_state = gr.State(None)
with gr.Row():
with gr.Column(scale=1):
docx_file_input = gr.File(label="π Upload Resume", file_types=[".docx"])
job_desc_input = gr.Textbox(lines=12, label="π Paste Job Description")
analyze_button = gr.Button("π Analyze", variant="primary")
with gr.Column(scale=2):
analysis_results_markdown = gr.Markdown()
with gr.Group(visible=False) as keyword_selection_group:
missing_keywords_checkboxes = gr.CheckboxGroup(label="π‘ Select Keywords", choices=[])
add_selected_button = gr.Button("βοΈ Add Keywords", variant="secondary", visible=False)
modification_status_markdown = gr.Markdown()
download_optimized_resume_button = gr.File(label="β¬οΈ Download Resume", visible=False, interactive=False)
analyze_button.click(
fn=analyze_resume_for_suggestions,
inputs=[docx_file_input, job_desc_input],
outputs=[analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group, add_selected_button, resume_path_state]
)
add_selected_button.click(
fn=add_selected_keywords_to_docx,
inputs=[missing_keywords_checkboxes, resume_path_state],
outputs=[modification_status_markdown, download_optimized_resume_button]
)
def clear_on_new_input():
return "", gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), "", gr.update(visible=False, value=None, interactive=False), None
docx_file_input.change(fn=clear_on_new_input, inputs=None, outputs=[
analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group,
add_selected_button, modification_status_markdown, download_optimized_resume_button,
resume_path_state
])
job_desc_input.change(fn=clear_on_new_input, inputs=None, outputs=[
analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group,
add_selected_button, modification_status_markdown, download_optimized_resume_button,
resume_path_state
])
demo.launch()
|