Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import json | |
import os | |
import time | |
from typing import List, Dict, Any | |
from dotenv import load_dotenv | |
import concurrent.futures | |
import git | |
repo = git.Repo(search_parent_directories=True) | |
sha = repo.head.object.hexsha | |
js_func = """ | |
function refresh() { | |
const url = new URL(window.location); | |
if (url.searchParams.get('__theme') !== 'light') { | |
url.searchParams.set('__theme', 'light'); | |
window.location.href = url.href; | |
} | |
} | |
""" | |
# Load environment variables | |
load_dotenv(".env.local") | |
# Load endpoints from JSON file | |
with open('endpoints.json', 'r') as f: | |
ENDPOINTS = json.load(f) | |
# Get HuggingFace API token from environment variable | |
HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN') | |
if not HF_TOKEN: | |
print("Warning: HUGGINGFACE_TOKEN environment variable not set") | |
# API calling function with retry logic | |
def call_api(endpoint_url: str, payload: Dict[str, Any], max_retries: int = 5, retry_delay: int = 2) -> Dict: | |
"""Call API endpoint with retry logic""" | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
for attempt in range(max_retries): | |
try: | |
response = requests.post( | |
endpoint_url, | |
json=payload, | |
headers=headers, | |
timeout=30 | |
) | |
if response.status_code == 200: | |
return response.json() | |
elif response.status_code == 503: | |
print(f"Service temporarily unavailable (503). Retrying... (Attempt {attempt + 1}/{max_retries})") | |
time.sleep(retry_delay * (attempt + 1)) # Exponential backoff | |
continue | |
else: | |
print(f"Error calling API: {response.status_code}") | |
print(f"Response: {response.text}") | |
return {} | |
except requests.exceptions.Timeout: | |
print(f"Request timed out. Attempt {attempt + 1}/{max_retries}") | |
if attempt < max_retries - 1: | |
time.sleep(retry_delay) | |
except Exception as e: | |
print(f"Exception while calling API: {str(e)}") | |
if attempt < max_retries - 1: | |
time.sleep(retry_delay) | |
return {} | |
def wake_servers(progress=gr.Progress()): | |
"""Send wake-up requests to all endpoints in parallel with real-time updates""" | |
results = {} | |
status_html = "<h3>Server Wake-up Results:</h3><ul>" | |
for name in ENDPOINTS.keys(): | |
results[name] = "Pending..." | |
status_html += f"<li><strong>{name}</strong>: <span style='color:gray'>Pending...</span></li>" | |
status_html += "</ul>" | |
# Initial status HTML | |
yield status_html | |
def update_status_html(): | |
"""Generate HTML for current status""" | |
html = "<h3>Server Wake-up Results:</h3><ul>" | |
for name, status in results.items(): | |
status_color = "green" if "Status: 200" in status else "red" if "Error" in status or "Failed" in status else "gray" | |
html += f"<li><strong>{name}</strong>: <span style='color:{status_color}'>{status}</span></li>" | |
html += "</ul>" | |
return html | |
def try_wake_endpoint(name, url): | |
"""Helper function to wake endpoint with retry logic""" | |
retry_delays = [10] * 30 # Seconds to wait between retries | |
for retry_count, retry_delay in enumerate(retry_delays): | |
try: | |
# Update status to show attempt | |
results[name] = f"Attempting to connect... (try {retry_count+1}/{len(retry_delays)+1})" | |
# Send a small payload just to wake up the server | |
minimal_payload = {"inputs": "Hello"} | |
response = requests.post( | |
url, | |
json=minimal_payload, | |
headers={"Authorization": f"Bearer {HF_TOKEN}"}, | |
timeout=45 | |
) | |
if response.status_code == 200: | |
results[name] = f"Status: {response.status_code}" | |
return | |
else: | |
# Non-200 response, prepare for retry | |
if retry_count < len(retry_delays): | |
results[name] = f"Status: {response.status_code}, retrying in {retry_delay}s... (attempt {retry_count+1}/{len(retry_delays)})" | |
time.sleep(retry_delay) | |
else: | |
# All retries failed | |
results[name] = f"Status: {response.status_code} (Failed after {len(retry_delays)} retries)" | |
return | |
except Exception as e: | |
# Connection error, prepare for retry | |
if retry_count < len(retry_delays): | |
results[name] = f"Error connecting, retrying in {retry_delay}s... (attempt {retry_count+1}/{len(retry_delays)})" | |
time.sleep(retry_delay) | |
else: | |
# All retries failed | |
results[name] = f"Error: {str(e)} (Failed after {len(retry_delays)} retries)" | |
return | |
# Function to process a single endpoint and update UI | |
def process_endpoint(name, url): | |
try: | |
try_wake_endpoint(name, url) | |
finally: | |
# Return the updated status HTML | |
return update_status_html() | |
# Create a thread pool to wake up servers in parallel | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
# Start all tasks | |
futures = {executor.submit(process_endpoint, name, url): name | |
for name, url in ENDPOINTS.items()} | |
# Process results as they complete | |
for future in concurrent.futures.as_completed(futures): | |
name = futures[future] | |
try: | |
# Get the updated status HTML | |
status_html = future.result() | |
# Update progress | |
progress(sum(1 for r in results.values() if "Status: 200" in r) / len(ENDPOINTS), | |
desc=f"Waking up servers ({sum(1 for r in results.values() if r != 'Pending...')} of {len(ENDPOINTS)} processed)") | |
# Yield the updated status to show in real-time | |
yield status_html | |
except Exception as e: | |
print(f"Error processing {name}: {str(e)}") | |
results[name] = f"Error: Internal processing error" | |
yield update_status_html() | |
# Final update after all are complete | |
progress(1.0, desc="Complete!") | |
yield update_status_html() | |
def process_job_description(job_description: str) -> Dict: | |
"""Process job description and extract skills using the job endpoint""" | |
payload = {"inputs": job_description} | |
result = call_api(ENDPOINTS["job"], payload) | |
if not result: | |
# Return a fallback structure if API call fails | |
return {"skills": [], "total_skills": 0} | |
# Format the result to match expected structure | |
if "skills" in result: | |
# Add a "text" field to each skill for compatibility | |
for skill in result["skills"]: | |
skill["text"] = skill.get("name", "Unknown Skill") | |
result["total_skills"] = len(result["skills"]) | |
else: | |
result = {"skills": [], "total_skills": 0} | |
return result | |
def process_skill_quality(text: str) -> Dict: | |
"""Process a sentence through the skill quality endpoint""" | |
payload = {"inputs": text} | |
result = call_api(ENDPOINTS["skill_quality"], payload) | |
if not result: | |
return {"leadership": 0, "leadership_token": "No", "collaboration": 0, "collaboration_token": "No"} | |
return result | |
def process_skill_quality_batch(sentences): | |
"""Process multiple sentences through the skill quality endpoint concurrently""" | |
results = [] | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
future_to_sentence = { | |
executor.submit(process_skill_quality, sentence): sentence | |
for sentence in sentences | |
} | |
for future in concurrent.futures.as_completed(future_to_sentence): | |
sentence = future_to_sentence[future] | |
try: | |
quality_score = future.result() | |
is_leadership = quality_score["leadership_token"] == "Yes" | |
is_collaboration = not is_leadership and quality_score["collaboration_token"] == "Yes" | |
results.append({ | |
"sentence": sentence, | |
"is_leadership": is_leadership, | |
"is_collaboration": is_collaboration, | |
"raw_score": quality_score | |
}) | |
except Exception as e: | |
print(f"Error processing sentence: {sentence[:30]}... - {str(e)}") | |
results.append({ | |
"sentence": sentence, | |
"is_leadership": False, | |
"is_collaboration": False, | |
"raw_score": {"leadership": 0, "leadership_token": "No", "collaboration": 0, "collaboration_token": "No"} | |
}) | |
return results | |
def process_single_resume(file_path, job_skills, progress=None, resume_index=0, total_resumes=1, resume_status=None): | |
"""Process a single resume file""" | |
progress_base = 0.4 + (0.5 * resume_index / total_resumes) | |
progress_cap = 0.4 + (0.5 * (resume_index + 1) / total_resumes) | |
resume_name = os.path.basename(file_path) | |
try: | |
if progress is not None: | |
progress(progress_base, desc=f"Processing resume {resume_index+1}/{total_resumes}: {resume_name}...") | |
# Update resume status if provided | |
if resume_status is not None: | |
resume_status[resume_name] = {"progress": 0, "status": "Starting analysis...", "sentences_processed": 0, "total_sentences": 0} | |
print(f"Initialized status for {resume_name}") | |
except Exception as e: | |
print(f"Error initializing status for {resume_name}: {str(e)}") | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
resume_content = f.read() | |
print(f"Starting processing of {resume_name}") | |
result = process_resume(resume_content, job_skills, | |
progress=progress, progress_base=progress_base, progress_cap=progress_cap, | |
resume_status=resume_status, resume_name=resume_name) | |
print(f"Finished processing {resume_name}") | |
# Return both the result and the filename | |
return result, resume_name | |
except Exception as e: | |
print(f"Error processing {resume_name}: {str(e)}") | |
if resume_status is not None: | |
resume_status[resume_name]["status"] = f"Error: {str(e)}" | |
return {"skills": [], "total_skills": 0}, resume_name | |
def update_resume_status_html(resume_status): | |
"""Generate HTML table for resume processing status""" | |
html = "<h3>Resume Processing Status:</h3>" | |
html += "<table class='resume-status-table' style='width: 100%; border-collapse: collapse; margin-bottom: 20px;'>" | |
html += "<tr style='background-color: #eee;'><th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>Resume</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Progress</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Status</th></tr>" | |
# Print the current status to aid debugging | |
print(f"Current resume status: {resume_status}") | |
for resume_name, status in resume_status.items(): | |
progress_pct = 0 | |
if status["total_sentences"] > 0: | |
progress_pct = round((status["sentences_processed"] / status["total_sentences"]) * 100) | |
progress_text = f"{status['sentences_processed']}/{status['total_sentences']} sentences ({progress_pct}%)" | |
# Color based on progress | |
if progress_pct == 100: | |
color = "green" | |
elif progress_pct > 0: | |
color = "orange" | |
else: | |
color = "gray" | |
html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{resume_name}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{progress_text}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; color: {color};'>{status['status']}</td></tr>" | |
html += "</table>" | |
return html | |
def process_resume(resume_text: str, job_skills: List[str], progress=None, progress_base=0.4, progress_cap=0.9, resume_status=None, resume_name=None) -> Dict: | |
"""Process resume using the resume endpoint""" | |
payload = {"inputs": resume_text} | |
# Thread-safe updating of resume status | |
try: | |
if resume_status is not None and resume_name is not None: | |
# Use a try-except block for thread safety when updating shared state | |
try: | |
resume_status[resume_name]["status"] = "Extracting roles..." | |
print(f"Updating status for {resume_name}: Extracting roles...") | |
except Exception as e: | |
print(f"Error updating initial status for {resume_name}: {str(e)}") | |
except Exception as e: | |
print(f"Error in initial status update: {str(e)}") | |
# Call the API | |
result = call_api(ENDPOINTS["resume"], payload, max_retries=10, retry_delay=10) | |
if not result: | |
# Update status on failure and raise exception to indicate failure | |
if resume_status is not None and resume_name is not None: | |
resume_status[resume_name]["status"] = "Error: Failed to extract roles" | |
resume_status[resume_name]["failed"] = True # Mark as failed | |
raise Exception("Failed to extract roles from resume") | |
# Count total sentences for progress tracking | |
total_sentences = 0 | |
for job in result: | |
if "description" in job: | |
total_sentences += len(job.get("description", [])) | |
# Update status with total sentences - thread safe | |
try: | |
if resume_status is not None and resume_name is not None: | |
resume_status[resume_name]["total_sentences"] = total_sentences | |
print(f"Total sentences for {resume_name}: {total_sentences}") | |
except Exception as e: | |
print(f"Error updating total sentences for {resume_name}: {str(e)}") | |
# Extract all skills from all job experiences | |
all_skills = [] | |
processed_sentences = 0 | |
# Process skill quality for each role description | |
for job in result: | |
if "skills" in job: | |
for skill in job["skills"]: | |
# Add a "text" field for compatibility | |
skill["text"] = skill.get("name", "Unknown Skill") | |
all_skills.append(skill) | |
# Process skill quality for sentences | |
if "description" in job and job["description"]: | |
# Get all sentences for this job | |
sentences = job.get("description", []) | |
# Process all sentences for this job | |
quality_scores = process_skill_quality_batch(sentences) | |
# Update progress after batch processing - thread safe | |
processed_sentences += len(sentences) | |
try: | |
if resume_status is not None and resume_name is not None: | |
resume_status[resume_name]["sentences_processed"] = processed_sentences | |
resume_status[resume_name]["progress"] = round(processed_sentences/total_sentences*100) | |
resume_status[resume_name]["status"] = f"Analyzing skill quality... ({processed_sentences}/{total_sentences})" | |
print(f"Updated {resume_name} progress: {processed_sentences}/{total_sentences} sentences") | |
except Exception as e: | |
print(f"Error updating progress for {resume_name}: {str(e)}") | |
job["quality_scores"] = quality_scores | |
# Update status to complete - thread safe | |
try: | |
if resume_status is not None and resume_name is not None and not resume_status[resume_name].get("failed", False): | |
resume_status[resume_name]["status"] = "Analysis complete" | |
resume_status[resume_name]["sentences_processed"] = total_sentences | |
resume_status[resume_name]["progress"] = 100 | |
print(f"Completed analysis for {resume_name}") | |
except Exception as e: | |
print(f"Error updating final status for {resume_name}: {str(e)}") | |
# Add fields to match expected structure | |
formatted_result = { | |
"skills": all_skills, | |
"total_skills": len(all_skills), | |
"roles": result # Keep the original roles data | |
} | |
return formatted_result | |
# Create a helper function to format years of experience in a readable format | |
def format_years_of_experience(years): | |
"""Format years as a combination of years and months""" | |
full_years = int(years) | |
months = int(round((years - full_years) * 12)) | |
if full_years > 0 and months > 0: | |
return f"{full_years}y {months}m" | |
elif full_years > 0: | |
return f"{full_years}y" | |
elif months > 0: | |
return f"{months}m" | |
else: | |
return "0" | |
def create_html_output(job_result: Dict, resume_results: List[Dict], filenames: List[str] = None) -> str: | |
"""Create HTML output for the interface""" | |
html = "<div style='font-family: Arial, sans-serif;'>" | |
# Remove the global script since it's not working | |
# Set default filenames if not provided | |
if not filenames: | |
filenames = [f"Resume {i}" for i in range(1, len(resume_results) + 1)] | |
# Job Description Section | |
html += "<h2>Job Description Analysis</h2>" | |
html += f"<p><strong>Total Skills Found:</strong> {job_result['total_skills']}</p>" | |
html += "<p><strong>Skills:</strong></p>" | |
html += "<div style='background-color: #f0f0f0; padding: 10px; border-radius: 5px;'>" | |
for skill in job_result['skills']: | |
html += f"<span style='background-color: #e0e0e0; padding: 2px 5px; margin: 2px; border-radius: 3px; display: inline-block;'>{skill['text']}</span>" | |
html += "</div>" | |
# Get job skills for matching | |
job_skills = [skill['text'].lower() for skill in job_result['skills']] | |
# Resume Analysis Section | |
html += "<h2>Resume Analysis</h2>" | |
# Check if we have multiple resumes to display summary table | |
multiple_resumes = len(resume_results) > 1 | |
# Calculate leadership and collaboration counts, and total experience for each resume | |
leadership_counts = [] | |
collaboration_counts = [] | |
total_experiences = [] | |
skill_experience_maps = [] | |
skill_leadership_maps = [] # New map for tracking leadership skills | |
skill_collaboration_maps = [] # New map for tracking collaboration skills | |
for resume_result in resume_results: | |
# Count leadership and collaboration sentences | |
leadership_count = 0 | |
collaboration_count = 0 | |
# Calculate total experience | |
total_experience = 0 | |
skill_experience = {} | |
skill_leadership = {} # Track which skills have leadership statements | |
skill_collaboration = {} # Track which skills have collaboration statements | |
if 'roles' in resume_result: | |
for role in resume_result['roles']: | |
# Count quality scores and track skills with leadership/collaboration | |
if 'quality_scores' in role: | |
for score in role['quality_scores']: | |
if score['is_leadership']: | |
leadership_count += 1 | |
# Extract skills from leadership statement | |
for skill in role.get('skills', []): | |
skill_name = skill.get('name', '').lower() | |
if skill_name: | |
skill_leadership[skill_name] = True | |
elif score['is_collaboration']: | |
collaboration_count += 1 | |
# Extract skills from collaboration statement | |
for skill in role.get('skills', []): | |
skill_name = skill.get('name', '').lower() | |
if skill_name: | |
skill_collaboration[skill_name] = True | |
# Calculate experience duration | |
if 'role_length' in role: | |
# Convert months to years (role_length is in months) | |
years_in_role = role['role_length'] / 12 | |
total_experience += years_in_role | |
# Calculate experience per skill | |
for skill in role.get('skills', []): | |
skill_name = skill.get('name', '').lower() | |
if skill_name: | |
if skill_name in skill_experience: | |
skill_experience[skill_name] += years_in_role | |
else: | |
skill_experience[skill_name] = years_in_role | |
elif 'dates' in role and role['dates']: | |
# Fallback to old method if role_length is not available | |
start_date = role['dates'].get('date_started', '') | |
end_date = role['dates'].get('date_ended', '') | |
try: | |
# Try to extract years from dates | |
start_year = int(''.join(filter(str.isdigit, start_date[-4:]))) if start_date else 0 | |
end_year = int(''.join(filter(str.isdigit, end_date[-4:]))) if end_date and end_date.lower() != 'present' else time.localtime().tm_year | |
years_in_role = max(0, end_year - start_year) | |
total_experience += years_in_role | |
# Calculate experience per skill | |
for skill in role.get('skills', []): | |
skill_name = skill.get('name', '').lower() | |
if skill_name: | |
if skill_name in skill_experience: | |
skill_experience[skill_name] += years_in_role | |
else: | |
skill_experience[skill_name] = years_in_role | |
except: | |
# Skip if date parsing fails | |
pass | |
leadership_counts.append(leadership_count) | |
collaboration_counts.append(collaboration_count) | |
total_experiences.append(total_experience) | |
skill_experience_maps.append(skill_experience) | |
skill_leadership_maps.append(skill_leadership) | |
skill_collaboration_maps.append(skill_collaboration) | |
# Calculate averages for leadership and collaboration | |
avg_leadership = sum(leadership_counts) / len(leadership_counts) if leadership_counts else 0 | |
avg_collaboration = sum(collaboration_counts) / len(collaboration_counts) if collaboration_counts else 0 | |
# Create summary table if multiple resumes | |
if multiple_resumes: | |
html += "<h3>Match Summary</h3>" | |
html += "<table style='width: 100%; border-collapse: collapse; margin-bottom: 20px;'>" | |
html += "<tr style='background-color: #eee;'>" | |
html += "<th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>JOB ID</th>" | |
html += "<th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>CANDIDATE</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>% MATCHED SKILLS</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>SKILL</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>YEARS OF EXPERIENCE</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>MATCH CATEGORY</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>LEADERSHIP QUALITY</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>COLLABORATION QUALITY</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>ACTIONS</th></tr>" | |
# Create a list of resume data for sorting | |
resume_data = [] | |
for i, resume_result in enumerate(resume_results, 1): | |
# Get primary skills (Java and React in this case, as per image) | |
primary_skills = {} | |
skill_exp = {} # Create a new dict to aggregate experience | |
# First, aggregate all experience for each skill | |
for skill_name, years in skill_experience_maps[i-1].items(): | |
skill_name_lower = skill_name.lower() | |
if skill_name_lower in skill_exp: | |
skill_exp[skill_name_lower] = max(skill_exp[skill_name_lower], years) # Take the max experience | |
else: | |
skill_exp[skill_name_lower] = years | |
# Get unique resume skills | |
resume_skills = set(skill['text'].lower() for skill in resume_result['skills']) | |
# Only include skills that are in both the resume and job requirements | |
matched_skills = [] | |
for skill in job_skills: | |
if skill.lower() in resume_skills: | |
matched_skills.append(skill) | |
if skill.lower() in skill_exp: | |
primary_skills[skill] = skill_exp[skill.lower()] | |
# Calculate skill match | |
# resume_skills = [skill['text'].lower() for skill in resume_result['skills']] | |
# matched_skills = [skill for skill in resume_skills if skill in job_skills] | |
match_count = len(matched_skills) | |
match_percentage = round((match_count / job_result['total_skills'] * 100) if job_result['total_skills'] > 0 else 0, 1) | |
# Get leadership and collaboration counts | |
leadership_count = leadership_counts[i-1] | |
collaboration_count = collaboration_counts[i-1] | |
# Get total experience and determine category | |
total_experience = total_experiences[i-1] | |
# Determine leadership quality (YES/NO) | |
leadership_quality = "YES" if leadership_count > avg_leadership * 1.2 else "NO" | |
# Determine thoroughness quality (YES/NO) | |
collaboration_quality = "YES" if collaboration_count > avg_collaboration * 1.2 else "NO" | |
# Determine match category (Strong, Close, Weak) | |
if match_percentage >= 80: | |
match_category = "Strong Match" | |
if leadership_quality == "YES" and collaboration_quality == "YES": | |
match_category = "Strong Quality Match (Leadership and Collaboration)" | |
elif leadership_quality == "YES": | |
match_category = "Strong Quality Match (Leadership)" | |
elif collaboration_quality == "YES": | |
match_category = "Strong Quality Match (Collaboration)" | |
elif match_percentage >= 50: | |
match_category = "Close Match" | |
if leadership_quality == "YES" and collaboration_quality == "YES": | |
match_category = "Close Quality Match (Leadership and Collaboration)" | |
elif leadership_quality == "YES": | |
match_category = "Close Quality Match (Leadership)" | |
elif collaboration_quality == "YES": | |
match_category = "Close Match (Collaboration)" | |
else: | |
match_category = "Weak Match" | |
if leadership_quality == "YES" and collaboration_quality == "YES": | |
match_category = "Weak Quality Match (Leadership and Collaboration)" | |
elif leadership_quality == "YES": | |
match_category = "Weak Quality Match (Leadership)" | |
elif collaboration_quality == "YES": | |
match_category = "Weak Quality Match (Collaboration)" | |
# Add to resume data list for sorting | |
resume_data.append({ | |
'job_id': "JD-1", | |
'index': i, | |
'match_percentage': match_percentage, | |
'matched_skills': matched_skills, | |
'primary_skills': primary_skills, | |
'match_category': match_category, | |
'leadership_quality': leadership_quality, | |
'collaboration_quality': collaboration_quality, | |
'leadership_count': leadership_count, | |
'collaboration_count': collaboration_count, | |
'filename': filenames[i-1] | |
}) | |
# Sort resumes by match percentage (highest first) | |
sorted_resumes = sorted(resume_data, key=lambda x: -x['match_percentage']) | |
# Add rows to summary table | |
for resume_data in sorted_resumes: | |
# Generate table row for each candidate | |
file_name = resume_data['filename'] # Convert 1,2,3 to A,B,C | |
# Create a single row per candidate | |
html += "<tr>" | |
# Job ID | |
html += f"<td style='padding: 8px; border: 1px solid #ddd;'>{resume_data['job_id']}</td>" | |
# Candidate | |
html += f"<td style='padding: 8px; border: 1px solid #ddd;'>{file_name}</td>" | |
# Match Percentage | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{resume_data['match_percentage']}%</td>" | |
# Skills and Experience | |
if resume_data['matched_skills']: | |
# Show the first matched skill and its experience | |
skill = resume_data['matched_skills'][0] | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{skill}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{format_years_of_experience(resume_data['primary_skills'].get(skill, 0))}</td>" | |
else: | |
# Show dashes for no matches | |
html += "<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>-</td>" | |
html += "<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>-</td>" | |
# Match Category | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{resume_data['match_category']}</td>" | |
# Leadership and Collaboration Quality with counts specific to this resume | |
leadership_color = "#e6ffe6" if resume_data['leadership_quality'] == "YES" else "#f5f5f5" | |
collaboration_color = "#e6ffe6" if resume_data['collaboration_quality'] == "YES" else "#f5f5f5" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; background-color: {leadership_color};'>Leadership: {resume_data['leadership_quality']} ({resume_data['leadership_count']})</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; background-color: {collaboration_color};'>Collaboration: {resume_data['collaboration_quality']} ({resume_data['collaboration_count']})</td>" | |
# Actions | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>" | |
html += f"<button onclick=\"const allDetails = document.querySelectorAll('.resume-detail'); allDetails.forEach(detail => {{ if(detail.id !== 'resume-detail-{resume_data['index']}') detail.style.display = 'none'; }}); document.getElementById('resume-detail-{resume_data['index']}').style.display = document.getElementById('resume-detail-{resume_data['index']}').style.display === 'none' ? 'block' : 'none';\" style='background-color: #4CAF50; color: white; padding: 6px 12px; border: none; border-radius: 4px; cursor: pointer; font-size: 14px;'>Toggle Details</button>" | |
html += "</td>" | |
html += "</tr>" | |
# If there are additional matched skills, add them in subsequent rows | |
if len(resume_data['matched_skills']) > 1: | |
for skill in resume_data['matched_skills'][1:]: | |
html += "<tr>" | |
# Empty cells for the first three columns | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
# Skill and Experience | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{skill}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{format_years_of_experience(resume_data['primary_skills'].get(skill, 0))}</td>" | |
# Empty cells for the remaining columns | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "<td style='padding: 8px; border: 1px solid #ddd;'></td>" | |
html += "</tr>" | |
html += "</table>" | |
# Add individual resume sections | |
for i, resume_result in enumerate(resume_results, 1): | |
# For multiple resumes, make detailed view hidden by default | |
display_style = "none" if multiple_resumes else "block" | |
# If single resume, show matched skills count | |
if not multiple_resumes: | |
resume_skills = [skill['text'].lower() for skill in resume_result['skills']] | |
matched_skills = [skill for skill in resume_skills if skill in job_skills] | |
match_count = len(matched_skills) | |
match_percentage = round((match_count / job_result['total_skills'] * 100) if job_result['total_skills'] > 0 else 0, 1) | |
html += f"<p><strong>Skills Matched:</strong> {match_count}/{job_result['total_skills']} ({match_percentage}%)</p>" | |
# Show leadership and collaboration counts | |
leadership_count = leadership_counts[i-1] | |
collaboration_count = collaboration_counts[i-1] | |
total_experience = total_experiences[i-1] | |
if total_experience < 3: | |
category = "Entry" | |
elif total_experience < 5: | |
category = "Intermediate" | |
else: | |
category = "Advanced" | |
# Add quality modifier if 50% above average | |
if leadership_count > avg_leadership * 1.5: | |
category = f"Quality {category} (Leadership)" | |
elif collaboration_count > avg_collaboration * 1.5: | |
category = f"Quality {category} (Collaboration)" | |
html += f"<p><strong>Leadership Sentences:</strong> {leadership_count}</p>" | |
html += f"<p><strong>Collaboration Sentences:</strong> {collaboration_count}</p>" | |
html += f"<p><strong>Total Years of Experience:</strong> {format_years_of_experience(total_experience)}</p>" | |
html += f"<p><strong>Category:</strong> {category}</p>" | |
# Get the filename for this resume | |
resume_file = filenames[i-1] | |
# Detailed resume section with visibility control | |
html += f"<div id='resume-detail-{i}' class='resume-detail' style='margin-bottom: 20px; padding: 10px; border: 1px solid #ddd; border-radius: 5px; display: {display_style};'>" | |
html += f"<h3>{resume_file} Details</h3>" | |
# Skills Summary specific to this resume - Now inside the detail section | |
html += "<h3>Skills Summary</h3>" | |
html += "<div style='margin-bottom: 10px;'>" | |
html += f"""<button id='skillsToggleBtn{i}' style='background-color: #4CAF50; color: white; padding: 8px 16px; border: none; border-radius: 4px; cursor: pointer;' | |
onclick=" | |
var button = document.getElementById('skillsToggleBtn{i}'); | |
var rows = document.querySelectorAll('.skill-row{i}'); | |
var showOnlyMatched = button.textContent.includes('Show All'); | |
for (var j = 0; j < rows.length; j++) {{ | |
if (showOnlyMatched) {{ | |
rows[j].style.display = ''; | |
}} else {{ | |
rows[j].style.display = rows[j].getAttribute('data-match') === 'true' ? '' : 'none'; | |
}} | |
}} | |
button.textContent = showOnlyMatched ? 'Show Only Matched Skills' : 'Show All Skills'; | |
" | |
>Show All Skills</button>""" | |
html += "</div>" | |
html += f"<table style='width: 100%; border-collapse: collapse; margin-bottom: 20px;' id='skillsTable{i}'>" | |
html += "<tr style='background-color: #eee;'><th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>Skill</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Years of Experience</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Leadership Quality Count</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Collaboration Quality Count</th>" | |
html += "<th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Match</th></tr>" | |
# Get all unique skills for this specific resume | |
resume_skills = set() | |
for skill in resume_result['skills']: | |
resume_skills.add(skill['text'].lower()) | |
# Get skill maps for this resume | |
skill_experience = skill_experience_maps[i-1] | |
skill_leadership = skill_leadership_maps[i-1] | |
skill_collaboration = skill_collaboration_maps[i-1] | |
# Create a list of skill data for sorting | |
skill_data = [] | |
added_skills = set() # Track which skills we've already added | |
# Count quality statements per skill | |
skill_leadership_counts = {} | |
skill_collaboration_counts = {} | |
if 'roles' in resume_result: | |
for role in resume_result['roles']: | |
if 'quality_scores' in role: | |
role_skills = {skill['name'].lower() for skill in role.get('skills', [])} | |
for score in role['quality_scores']: | |
for skill in role_skills: | |
if score['is_leadership']: | |
skill_leadership_counts[skill] = skill_leadership_counts.get(skill, 0) + 1 | |
elif score['is_collaboration']: | |
skill_collaboration_counts[skill] = skill_collaboration_counts.get(skill, 0) + 1 | |
for skill in resume_skills: | |
# Only add if we haven't seen this skill before | |
if skill not in added_skills: | |
# Get years of experience for this skill | |
years = skill_experience.get(skill, 0) | |
# Get quality counts for this skill | |
leadership_count = skill_leadership_counts.get(skill, 0) | |
collaboration_count = skill_collaboration_counts.get(skill, 0) | |
# Check if skill matches job requirements | |
is_match = skill in job_skills | |
skill_data.append({ | |
'skill': skill, | |
'years': years, | |
'leadership_count': leadership_count, | |
'collaboration_count': collaboration_count, | |
'is_match': is_match | |
}) | |
added_skills.add(skill) | |
# Sort skills by years of experience (descending) | |
skill_data.sort(key=lambda x: (-x['years'], x['skill'])) | |
# Add rows for each skill | |
for data in skill_data: | |
# Set initial display based on match (only show matched skills by default) | |
display = "none" if not data['is_match'] else "" | |
# Add row to summary table | |
html += f"<tr class='skill-row{i}' data-match='{str(data['is_match']).lower()}' style='display: {display};'>" | |
html += f"<td style='padding: 8px; border: 1px solid #ddd;'>{data['skill']}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{format_years_of_experience(data['years'])}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{data['leadership_count']}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd;'>{data['collaboration_count']}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; color: {'green' if data['is_match'] else 'red'};'>{'Yes' if data['is_match'] else 'No'}</td></tr>" | |
html += "</table>" | |
# Display all skills found in the resume | |
html += "<p><strong>Skills Found:</strong></p>" | |
html += "<div style='background-color: #f0f0f0; padding: 10px; border-radius: 5px;'>" | |
# Keep track of skills we've already added | |
added_skills = set() | |
for skill in resume_result['skills']: | |
skill_text = skill['text'].lower() | |
# Only add if we haven't seen this skill before | |
if skill_text not in added_skills: | |
# Highlight matched skills | |
is_match = skill_text in job_skills | |
bg_color = "#c8e6c9" if is_match else "#e0e0e0" # Green tint for matches | |
# Add years of experience for this skill if available | |
skill_years = skill_experience.get(skill_text, 0) | |
experience_text = f" ({format_years_of_experience(skill_years)})" if skill_years > 0 else "" | |
html += f"<span style='background-color: {bg_color}; padding: 2px 5px; margin: 2px; border-radius: 3px; display: inline-block;'>{skill['text']}{experience_text}</span>" | |
added_skills.add(skill_text) | |
html += "</div>" | |
# Job roles section | |
if 'roles' in resume_result and resume_result['roles']: | |
html += "<p><strong>Job Experience:</strong></p>" | |
for role in resume_result['roles']: | |
html += f"<div style='margin: 10px 0; padding: 10px; background-color: #f9f9f9; border-radius: 5px;'>" | |
html += f"<p><strong>Title:</strong> {' '.join(role.get('title', ['Unknown']))}</p>" | |
if 'dates' in role and role['dates']: | |
html += f"<p><strong>Period:</strong> {role['dates'].get('date_started', 'Unknown')} to {role['dates'].get('date_ended', 'Unknown')}</p>" | |
if 'role_length' in role: | |
years = role['role_length'] / 12 | |
months = role['role_length'] % 12 | |
duration_text = "" | |
if years >= 1: | |
duration_text += f"{int(years)} year{'s' if int(years) > 1 else ''}" | |
if months > 0: | |
if duration_text: | |
duration_text += " and " | |
duration_text += f"{int(months)} month{'s' if int(months) > 1 else ''}" | |
html += f"<p><strong>Duration:</strong> {duration_text}</p>" | |
html += f"<p><strong>Role Skills:</strong></p>" | |
html += "<div style='margin-left: 20px;'>" | |
for skill in role.get('skills', []): | |
# Highlight matched skills in roles too | |
skill_name = skill.get('name', 'Unknown') | |
is_match = skill_name.lower() in job_skills | |
bg_color = "#c8e6c9" if is_match else "#e0e0e0" # Green tint for matches | |
html += f"<span style='background-color: {bg_color}; padding: 2px 5px; margin: 2px; border-radius: 3px; display: inline-block;'>{skill_name}</span>" | |
html += "</div>" | |
# Display skill quality analysis | |
if 'quality_scores' in role and role['quality_scores']: | |
html += "<p><strong>Skill Quality Analysis:</strong></p>" | |
html += "<table style='width: 100%; border-collapse: collapse; margin-top: 10px;'>" | |
html += "<tr style='background-color: #eee;'><th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>Statement</th><th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Leadership</th><th style='padding: 8px; text-align: center; border: 1px solid #ddd;'>Collaboration</th></tr>" | |
for score in role['quality_scores']: | |
leadership_class = "green-text" if score['is_leadership'] else "red-text" | |
collab_class = "green-text" if score['is_collaboration'] else "red-text" | |
html += f"<tr><td style='padding: 8px; border: 1px solid #ddd;'>{score['sentence']}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; color: {'green' if score['is_leadership'] else 'red'};'>{'Yes' if score['is_leadership'] else 'No'}</td>" | |
html += f"<td style='padding: 8px; text-align: center; border: 1px solid #ddd; color: {'green' if score['is_collaboration'] else 'red'};'>{'Yes' if score['is_collaboration'] else 'No'}</td></tr>" | |
html += "</table>" | |
html += "</div>" | |
html += "</div>" | |
html += "</div>" | |
return html | |
# Create Gradio interface | |
with gr.Blocks(title="Beyond Keywords: Resume Analysis System", js=js_func) as demo: | |
gr.Markdown("# Beyond Keywords: Job Description and Resume Analyzer") | |
gr.Markdown(f"Running on Commit: {sha[:7]}") | |
gr.Markdown("Upload a job description and resume(s) to analyze skill matches and quality.") | |
# Remove the JavaScript that isn't working | |
# Instead, use Gradio's built-in features for status updates | |
# Server wake-up section | |
with gr.Row(): | |
wake_btn = gr.Button("Wake Servers (Do this first!) - Might take a 1-3 minutes for cold starts.") | |
wake_status = gr.HTML(label="Server Status", value="<div style='color: #666;'>Click 'Wake Servers'<br>to initialize the system...</div>") | |
# Input section | |
with gr.Row(): | |
with gr.Column(): | |
job_description = gr.Textbox( | |
label="Job Description", | |
placeholder="Paste the job description here...", | |
lines=13.10 | |
) | |
with gr.Column(): | |
resume_input = gr.Group() | |
with resume_input: | |
input_type = gr.Radio( | |
choices=["Paste Text", "Upload File(s)"], | |
label="Input Method", | |
value="Paste Text" | |
) | |
resume_text = gr.Textbox( | |
label="Resume Text", | |
placeholder="Paste the resume text here...", | |
lines=8.85, | |
visible=True | |
) | |
resume_file = gr.Files( | |
label="Upload Resume(s) (.txt files)", | |
file_types=[".txt"], | |
visible=False, | |
interactive=True, | |
type="filepath" | |
) | |
def toggle_input(choice): | |
return { | |
resume_text: gr.update(visible=choice=="Paste Text"), | |
resume_file: gr.update(visible=choice=="Upload File(s)") | |
} | |
input_type.change( | |
fn=toggle_input, | |
inputs=input_type, | |
outputs=[resume_text, resume_file] | |
) | |
submit_btn = gr.Button("Analyze", variant="primary") | |
# Place status table before the main results | |
resume_status = gr.HTML(label="Resume Processing Status", elem_id="resume-status-div", value="<div style='color: #666;'>Click 'Analyze'<br>to see processing status...</div>") | |
# Main output area | |
output = gr.HTML(label="Analysis Results", value="<div style='color: #666;'></div>") | |
# Function to process with real-time updates using generators | |
def process_with_updates(job_description, input_type, resume_text, resume_files): | |
"""Process inputs with real-time status updates using generators""" | |
# Initialize resume status tracking | |
resume_status = {} | |
# Initial status HTML | |
status_html = "<h3>Resume Processing Status:</h3><p>Preparing to analyze resumes...</p>" | |
yield status_html, gr.update(value="<div style='color: #666;'>Processing job description...</div>") | |
# Process job description first | |
job_result = process_job_description(job_description) | |
job_skills = [skill['text'] for skill in job_result['skills']] | |
# Set up initial status table | |
resume_results = [] | |
filenames = [] | |
if input_type == "Paste Text": | |
# Process single resume | |
resume_name = "Pasted Resume" | |
resume_status[resume_name] = { | |
"progress": 0, | |
"status": "Starting analysis...", | |
"sentences_processed": 0, | |
"total_sentences": 0, | |
"failed": False | |
} | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, gr.update(value="<div style='color: #666;'>Extracting resume roles...</div>") | |
try: | |
resume_result = process_resume(resume_text, job_skills, | |
resume_status=resume_status, resume_name=resume_name) | |
resume_results.append(resume_result) | |
filenames.append(resume_name) | |
except Exception as e: | |
print(f"Error processing pasted resume: {str(e)}") | |
resume_status[resume_name]["failed"] = True | |
resume_status[resume_name]["status"] = f"Error: {str(e)}" | |
# Update status | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, gr.update(value="<div style='color: #666;'>Generating final report...</div>") | |
else: | |
# Process multiple resumes | |
resume_count = len(resume_files) | |
# Initialize status for each resume | |
for file_path in resume_files: | |
resume_name = os.path.basename(file_path) | |
resume_status[resume_name] = { | |
"progress": 0, | |
"status": "Queued", | |
"sentences_processed": 0, | |
"total_sentences": 0, | |
"failed": False | |
} | |
# Initial status update | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, gr.update(value=f"<div style='color: #666;'>Processing {resume_count} resumes...</div>") | |
# Create a thread pool to process resumes concurrently | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
# Create a dict to track all futures | |
future_to_resume = {} | |
# Submit all resume processing tasks | |
for file_path in resume_files: | |
resume_name = os.path.basename(file_path) | |
# Update status to "Processing" | |
resume_status[resume_name]["status"] = "Starting analysis..." | |
# Read the file content | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
resume_content = f.read() | |
# Submit the processing task | |
future = executor.submit( | |
process_resume, | |
resume_content, | |
job_skills, | |
resume_status=resume_status, | |
resume_name=resume_name | |
) | |
future_to_resume[future] = resume_name | |
except Exception as e: | |
print(f"Error reading {resume_name}: {str(e)}") | |
resume_status[resume_name]["status"] = f"Error: {str(e)}" | |
resume_status[resume_name]["failed"] = True | |
# Process with status updates | |
pending = set(future_to_resume.keys()) | |
completed = 0 | |
# Use a while loop to check pending futures and update status | |
while pending: | |
# Yield current status every iteration | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, gr.update(value=f"<div style='color: #666;'>Processed {completed}/{resume_count} resumes...</div>") | |
# Wait for the next future to complete (with short timeout) | |
done, pending = concurrent.futures.wait( | |
pending, timeout=1.0, | |
return_when=concurrent.futures.FIRST_COMPLETED | |
) | |
# Process completed futures | |
for future in done: | |
resume_name = future_to_resume[future] | |
try: | |
result = future.result() | |
if not resume_status[resume_name].get("failed", False): | |
resume_results.append(result) | |
filenames.append(resume_name) | |
completed += 1 | |
print(f"Completed processing of {resume_name} ({completed}/{resume_count})") | |
except Exception as e: | |
print(f"Error processing resume {resume_name}: {str(e)}") | |
resume_status[resume_name]["status"] = f"Error: {str(e)}" | |
resume_status[resume_name]["failed"] = True | |
# Final status update | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, gr.update(value=f"<div style='color: #666;'>All {resume_count} resumes processed. Generating report...</div>") | |
# Generate final output | |
html_output = create_html_output(job_result, resume_results, filenames) | |
# Final yield with complete status and output | |
status_html = update_resume_status_html(resume_status) | |
yield status_html, html_output | |
# Connect the submit button to the processing function with real-time updates | |
submit_btn.click( | |
fn=process_with_updates, | |
inputs=[job_description, input_type, resume_text, resume_file], | |
outputs=[resume_status, output] | |
) | |
# Connect wake button | |
wake_btn.click(fn=wake_servers, inputs=None, outputs=wake_status) | |
gr.Markdown("""<div style="height: 20px;"></div>""") | |
if __name__ == "__main__": | |
demo.launch() | |