File size: 8,309 Bytes
feb8bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import gradio as gr
from docx import Document
from docx.shared import Pt, RGBColor
import tempfile
import re
import logging
import os

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

STOP_PHRASES_LOWER = {
    # (add your full STOP_PHRASES_LOWER set here)
}

def extract_keywords_from_text(text):
    if not text:
        return []
    extracted_terms = set()
    tech_keyword_regex = r"""

        \b(?:

            [A-Z]{2,}[A-Z0-9./+#-]*[A-Z0-9]? |

            [A-Za-z][A-Za-z0-9]*\.(?:NET|js|io|ai|com|org) |

            [A-Za-z][A-Za-z0-9]*\+\+ |

            [A-Z][A-Za-z0-9#+-]+ |

            [A-Z][a-z]+[A-Z][A-Za-z0-9]+

        )\b

    """
    for match in re.finditer(tech_keyword_regex, text, re.VERBOSE):
        term = match.group(0).strip()
        if term:
            extracted_terms.add(term)

    main_word_pattern = r"(?:[A-Z][A-Za-z0-9'-]*|[A-Z]{2,}[A-Z0-9'-]*)"
    connector_pattern = r"(?:\s+(?:of|and|for|in|the|to|with|&|on|as|at|by|per)\s+)?"
    for num_main_words in range(2, 6):
        current_phrase_regex_str = r"\b" + main_word_pattern + \
                                   (connector_pattern + main_word_pattern) * (num_main_words - 1) + \
                                   r"\b"
        try:
            for match in re.finditer(current_phrase_regex_str, text):
                term = match.group(0).strip()
                term = re.sub(r'\s+', ' ', term)
                if term:
                    extracted_terms.add(term)
        except re.error as e:
            logger.warning(f"Regex error for multi-word phrases (N={num_main_words}): {e}.")
            continue

    final_cleaned_terms = set()
    for term in extracted_terms:
        term_normalized = term.strip()
        if not term_normalized or len(term_normalized) < 1 or len(term_normalized) > 75:
            continue
        if term_normalized.lower() in STOP_PHRASES_LOWER:
            continue
        if not re.search(r'[A-Za-z0-9]', term_normalized):
            continue
        final_cleaned_terms.add(term_normalized)

    return sorted(final_cleaned_terms, key=lambda x: (-len(x), x.lower()))

def extract_ats_keywords(job_description_text):
    logger.info("Extracting keywords from Job Description...")
    return extract_keywords_from_text(job_description_text)

def extract_resume_keywords(resume_text):
    logger.info("Extracting keywords from Resume...")
    return extract_keywords_from_text(resume_text)

def analyze_resume_for_suggestions(docx_file_obj, job_description_text):
    if docx_file_obj is None or not job_description_text.strip():
        return "⚠️ Error: Please upload your resume and paste the job description.", gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), None

    temp_resume_path = docx_file_obj.name
    resume_doc = Document(temp_resume_path)
    resume_text = "\n".join([p.text for p in resume_doc.paragraphs])
    ats_keywords = extract_ats_keywords(job_description_text)
    resume_keywords = extract_resume_keywords(resume_text)

    ats_keywords_lower_set = {kw.lower() for kw in ats_keywords}
    resume_keywords_lower_set = {kw.lower() for kw in resume_keywords}

    matching_keywords = [kw for kw in ats_keywords if kw.lower() in resume_keywords_lower_set]
    missing_keywords = [kw for kw in ats_keywords if kw.lower() not in resume_keywords_lower_set]

    coverage = len(matching_keywords) / len(ats_keywords_lower_set) * 100 if ats_keywords_lower_set else 0

    results_str = f"πŸ“Š **Resume Scan Results** πŸ“Š\n\n"
    results_str += f"πŸ”‘ **Job Description Keywords:**\n_{', '.join(ats_keywords)}_\n\n"
    results_str += f"πŸ“„ **Keywords Found in Your Resume:**\n_{', '.join(resume_keywords)}_\n\n"
    results_str += f"βœ… **Matching Keywords:**\n_{', '.join(matching_keywords)}_\n\n"
    results_str += f"🎯 **Keyword Coverage: {coverage:.2f}%**\n\n"

    TARGET_COVERAGE = 75.0
    if coverage >= TARGET_COVERAGE:
        results_str += f"πŸŽ‰ **Great! Your resume meets {TARGET_COVERAGE}% coverage!** πŸŽ‰"
        return results_str, gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), temp_resume_path

    needed = max(0, int(len(ats_keywords_lower_set) * (TARGET_COVERAGE/100.0)) - len(matching_keywords))
    results_str += f"πŸ’‘ **Suggested Keywords to Add (select below):** Add approx **{needed}** more keywords."

    return results_str, gr.update(choices=missing_keywords, value=[], visible=True), gr.update(visible=True), gr.update(visible=True), temp_resume_path

def add_selected_keywords_to_docx(selected_keywords, resume_path_from_state):
    yield "⏳ Processing... Adding selected keywords to your resume.", gr.update(value=None, visible=False, interactive=False)

    if not selected_keywords:
        yield "ℹ️ No keywords selected; nothing to do.", gr.update(value=None, visible=False, interactive=False)
        return

    if not resume_path_from_state or not os.path.exists(resume_path_from_state):
        yield "⚠️ Error: Original resume path not found. Please re‑analyze.", gr.update(value=None, visible=False, interactive=False)
        return

    resume_doc = Document(resume_path_from_state)
    keywords_string = " ".join(selected_keywords)
    p = resume_doc.add_paragraph()
    run = p.add_run(keywords_string)

    run.font.size = Pt(1)
    run.font.color.rgb = RGBColor(255, 255, 255)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".docx") as tmp_file:
        modified_resume_path = tmp_file.name
    resume_doc.save(modified_resume_path)

    yield f"βœ… Resume updated: {len(selected_keywords)} keyword(s) appended as white 1β€―pt text.", gr.update(value=modified_resume_path, visible=True, interactive=True)

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸš€ ATS Resume Optimizer")
    gr.Markdown("Upload .docx, paste JD, analyze, select keywords to add.")

    resume_path_state = gr.State(None)

    with gr.Row():
        with gr.Column(scale=1):
            docx_file_input = gr.File(label="πŸ“„ Upload Resume", file_types=[".docx"])
            job_desc_input = gr.Textbox(lines=12, label="πŸ“ Paste Job Description")
            analyze_button = gr.Button("πŸ” Analyze", variant="primary")
        with gr.Column(scale=2):
            analysis_results_markdown = gr.Markdown()
            with gr.Group(visible=False) as keyword_selection_group:
                missing_keywords_checkboxes = gr.CheckboxGroup(label="πŸ’‘ Select Keywords", choices=[])
                add_selected_button = gr.Button("✍️ Add Keywords", variant="secondary", visible=False)
            modification_status_markdown = gr.Markdown()
            download_optimized_resume_button = gr.File(label="⬇️ Download Resume", visible=False, interactive=False)

    analyze_button.click(
        fn=analyze_resume_for_suggestions,
        inputs=[docx_file_input, job_desc_input],
        outputs=[analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group, add_selected_button, resume_path_state]
    )
    add_selected_button.click(
        fn=add_selected_keywords_to_docx,
        inputs=[missing_keywords_checkboxes, resume_path_state],
        outputs=[modification_status_markdown, download_optimized_resume_button]
    )

    def clear_on_new_input():
        return "", gr.update(choices=[], value=[], visible=False), gr.update(visible=False), gr.update(visible=False), "", gr.update(visible=False, value=None, interactive=False), None

    docx_file_input.change(fn=clear_on_new_input, inputs=None, outputs=[
        analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group,
        add_selected_button, modification_status_markdown, download_optimized_resume_button,
        resume_path_state
    ])
    job_desc_input.change(fn=clear_on_new_input, inputs=None, outputs=[
        analysis_results_markdown, missing_keywords_checkboxes, keyword_selection_group,
        add_selected_button, modification_status_markdown, download_optimized_resume_button,
        resume_path_state
    ])

demo.launch()