Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
import google.generativeai as genai | |
import PyPDF2 as pdf | |
import io | |
import re | |
import streamlit as st | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import torch | |
# Set API key for Google API (Make sure it's securely set in your environment variables) | |
api_key = os.getenv('GOOGLE_API_KEY') | |
if not api_key: | |
raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.") | |
# Initialize the generative AI model | |
genai.configure(api_key=api_key) | |
# Load pre-trained models | |
skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple") | |
education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple") | |
# Define the task and model for Hugging Face | |
task = "sentiment-analysis" | |
model_name = "roberta-base" # Using RoBERTa | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
# Extract text from uploaded PDF file | |
def input_pdf_text(uploaded_file): | |
file_stream = io.BytesIO(uploaded_file.read()) | |
reader = pdf.PdfReader(file_stream) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Extract candidate name directly from the model response | |
def extract_name_from_model_response(response_text): | |
match = re.search(r"Candidate Name:\s*(.*)", response_text) | |
if match: | |
return match.group(1) | |
return "Not Available" | |
# Extract email and phone numbers using regex | |
def extract_contact_info(resume_text): | |
email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text) | |
email = email_match.group(0) if email_match else "Not Available" | |
contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text) | |
contact = contact_match.group(0) if contact_match else "Not Available" | |
return email, contact | |
# Extract skills using NER model | |
def extract_skills(resume_text): | |
ner_results = skill_extractor(resume_text) | |
skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL'] | |
return ", ".join(skills) if skills else "Not Available" | |
# Extract education information using NER model | |
def extract_education(resume_text): | |
ner_results = education_extractor(resume_text) | |
education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION'] | |
if education_entities: | |
return ", ".join(education_entities) | |
else: | |
edu_patterns = [ | |
r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)", | |
r"(University of [A-Za-z]+.*)" | |
] | |
education = [] | |
for pattern in edu_patterns: | |
matches = re.findall(pattern, resume_text) | |
education.extend(matches) | |
return ", ".join(education) if education else "Not Available" | |
# Extract team leadership and management years from the resume | |
def extract_experience_years(text): | |
years = 0 | |
patterns = [ | |
r"(\d{4})\s?[-to]+\s?(\d{4})", # From year to year | |
r"(\d+) years", # Exact mention of years | |
r"since (\d{4})", # Mentions "since" | |
r"(\d+)\s?[\-–]\s?(\d+)", # Handles year ranges with hyphens (e.g., 2015-2020) | |
r"(\d+)\s?[\–]\s?present", # Present with range (e.g., 2019–present) | |
] | |
for pattern in patterns: | |
matches = re.findall(pattern, text) | |
for match in matches: | |
if len(match) == 2: | |
start_year = int(match[0]) | |
end_year = int(match[1]) | |
years += end_year - start_year | |
elif len(match) == 1: | |
years += int(match[0]) | |
return years | |
# Calculate the match percentage using TF-IDF and cosine similarity | |
def calculate_match_percentage(resume_text, job_description): | |
documents = [resume_text, job_description] | |
tfidf_vectorizer = TfidfVectorizer(stop_words='english') | |
tfidf_matrix = tfidf_vectorizer.fit_transform(documents) | |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) | |
match_percentage = cosine_sim[0][0] * 100 | |
return round(match_percentage, 2) | |
# Generate the detailed analysis from the Gemini model | |
def get_gemini_response(input_text, job_description): | |
prompt = f""" | |
Act as an Applicant Tracking System. Analyze the resume with respect to the job description. | |
Candidate Details: {input_text} | |
Job Description: {job_description} | |
Please extract the following: | |
1. Candidate Name | |
2. Relevant Skills | |
3. Educational Background | |
4. Direct Team Leadership Experience (in years) | |
5. Direct Management Experience (in years) | |
6. Match percentage with the job description | |
7. Provide a resume summary in 5 bullet points highlighting the candidate's qualifications. | |
""" | |
model = genai.GenerativeModel('gemini-1.5-flash') | |
response = model.generate_content(prompt) | |
return response.text.strip() | |
# Extract a detailed resume summary (focusing on leadership roles and team management experience) | |
def extract_leadership_summary(response_text): | |
leadership_summary = "Resume Summary: Leadership and Team Management Experience (in years)\n" | |
lines = response_text.strip().split("\n") | |
meaningful_lines = [line.strip() for line in lines if line.strip()] | |
leadership_experience = [] | |
for line in meaningful_lines: | |
if "leadership" in line.lower() or "management" in line.lower() or "team" in line.lower(): | |
leadership_experience.append(line) | |
leadership_experience = leadership_experience[-5:] if len(leadership_experience) >= 5 else leadership_experience | |
for idx, bullet in enumerate(leadership_experience, 1): | |
leadership_summary += f"{idx}. {bullet}\n" | |
return leadership_summary | |
# Analyze the resume using Hugging Face RoBERTa | |
def analyze_resume(resume_text): | |
# Create input prompts for different aspects | |
prompts = [ | |
f"This resume shows strong managerial responsibilities: {resume_text}", | |
f"This resume demonstrates excellent leadership skills: {resume_text}", | |
f"This resume indicates significant work experience: {resume_text}", | |
f"This resume indicates at least 2 years of relevant experience: {resume_text}" | |
] | |
results = [] | |
for prompt in prompts: | |
# Tokenize the prompt with truncation | |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True) | |
outputs = model(**inputs) | |
predicted_class = torch.argmax(outputs.logits).item() | |
results.append(predicted_class) | |
# Interpret the results | |
analysis = { | |
"managerial_responsibilities": results[0] == 1, # Assuming 1 is positive sentiment | |
"leadership_skills": results[1] == 1, | |
"work_experience": results[2] == 1, | |
"relevant_experience": results[3] == 1 | |
} | |
# Check if all criteria are met | |
is_suitable = all(analysis.values()) | |
return analysis, is_suitable | |
# Streamlit interface to upload files and provide job description | |
st.title("Resume ATS Analysis Tool") | |
st.markdown("### Upload Resume and Job Description for Analysis") | |
# File uploader for resume PDF | |
uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"]) | |
# Job description text input | |
job_description = st.text_area("Job Description", height=200) | |
if uploaded_file and job_description: | |
analyze_button = st.button("Analyze") | |
if analyze_button: | |
resume_text = input_pdf_text(uploaded_file) | |
response_text = get_gemini_response(resume_text, job_description) | |
# Initialize an empty dictionary to hold the dynamic data | |
data = {} | |
# Extract candidate name | |
name = extract_name_from_model_response(response_text) | |
data['Candidate_Name'] = name if name != "Not Available" else "Not Available" | |
# Extract contact info (email, phone) | |
email, contact = extract_contact_info(resume_text) | |
data['Email'] = email if email != "Not Available" else "Not Available" | |
data['Contact'] = contact if contact != "Not Available" else "Not Available" | |
# Extract skills | |
skills = extract_skills(resume_text) | |
data['Skills'] = skills if skills != "Not Available" else "Not Available" | |
# Extract education | |
education = extract_education(resume_text) | |
data['Education'] = education if education != "Not Available" else "Not Available" | |
# Extract team leadership and management experience | |
team_leadership_years = extract_experience_years(resume_text) | |
data['Team_Leadership_Experience (Years)'] = team_leadership_years | |
management_experience_years = extract_experience_years(resume_text) | |
data['Management_Experience (Years)'] = management_experience_years | |
# Calculate match percentage dynamically | |
match_percentage = calculate_match_percentage(resume_text, job_description) | |
data['Match_Percentage'] = match_percentage | |
# Calculate Job Description Match Score dynamically (based on match percentage) | |
if match_percentage >= 80: | |
job_description_match_score = "High" | |
elif match_percentage >= 60: | |
job_description_match_score = "Medium" | |
else: | |
job_description_match_score = "Low" | |
data['Job_Description_Match_Score'] = job_description_match_score | |
# Extract leadership and team management summary | |
leadership_summary = extract_leadership_summary(response_text) | |
data['Leadership_and_Team_Management_Summary'] = leadership_summary | |
# Analyze the resume using Hugging Face RoBERTa | |
analysis, is_suitable = analyze_resume(resume_text) | |
data['Managerial_Responsibilities'] = analysis['managerial_responsibilities'] | |
data['Leadership_Skills'] = analysis['leadership_skills'] | |
data['Work_Experience'] = analysis['work_experience'] | |
data['Relevant_Experience'] = analysis['relevant_experience'] | |
data['Suitable_for_Role'] = is_suitable | |
# Display the results as a table | |
df = pd.DataFrame([data]) | |
st.write(df) | |
# Download the results as a CSV file | |
csv = df.to_csv(index=False) | |
st.download_button( | |
label="Download Results as CSV", | |
data=csv, | |
file_name='resume_analysis_results.csv', | |
mime='text/csv' | |
) | |
else: | |
st.write("Please upload a resume and provide a job description to analyze.") |