import os import pandas as pd import google.generativeai as genai import PyPDF2 as pdf import io import re import streamlit as st from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import torch # Set API key for Google API (Make sure it's securely set in your environment variables) api_key = os.getenv('GOOGLE_API_KEY') if not api_key: raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.") # Initialize the generative AI model genai.configure(api_key=api_key) # Load pre-trained models skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple") education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple") # Define the task and model for Hugging Face task = "sentiment-analysis" model_name = "roberta-base" # Using RoBERTa tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Extract text from uploaded PDF file def input_pdf_text(uploaded_file): file_stream = io.BytesIO(uploaded_file.read()) reader = pdf.PdfReader(file_stream) text = "" for page in reader.pages: text += page.extract_text() return text # Extract candidate name directly from the model response def extract_name_from_model_response(response_text): match = re.search(r"Candidate Name:\s*(.*)", response_text) if match: return match.group(1) return "Not Available" # Extract email and phone numbers using regex def extract_contact_info(resume_text): email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text) email = email_match.group(0) if email_match else "Not Available" contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text) contact = contact_match.group(0) if contact_match else "Not Available" return email, contact # Extract skills using NER model def extract_skills(resume_text): ner_results = skill_extractor(resume_text) skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL'] return ", ".join(skills) if skills else "Not Available" # Extract education information using NER model def extract_education(resume_text): ner_results = education_extractor(resume_text) education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION'] if education_entities: return ", ".join(education_entities) else: edu_patterns = [ r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)", r"(University of [A-Za-z]+.*)" ] education = [] for pattern in edu_patterns: matches = re.findall(pattern, resume_text) education.extend(matches) return ", ".join(education) if education else "Not Available" # Extract team leadership and management years from the resume def extract_experience_years(text): years = 0 patterns = [ r"(\d{4})\s?[-to]+\s?(\d{4})", # From year to year r"(\d+) years", # Exact mention of years r"since (\d{4})", # Mentions "since" r"(\d+)\s?[\-–]\s?(\d+)", # Handles year ranges with hyphens (e.g., 2015-2020) r"(\d+)\s?[\–]\s?present", # Present with range (e.g., 2019–present) ] for pattern in patterns: matches = re.findall(pattern, text) for match in matches: if len(match) == 2: start_year = int(match[0]) end_year = int(match[1]) years += end_year - start_year elif len(match) == 1: years += int(match[0]) return years # Calculate the match percentage using TF-IDF and cosine similarity def calculate_match_percentage(resume_text, job_description): documents = [resume_text, job_description] tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = tfidf_vectorizer.fit_transform(documents) cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) match_percentage = cosine_sim[0][0] * 100 return round(match_percentage, 2) # Generate the detailed analysis from the Gemini model def get_gemini_response(input_text, job_description): prompt = f""" Act as an Applicant Tracking System. Analyze the resume with respect to the job description. Candidate Details: {input_text} Job Description: {job_description} Please extract the following: 1. Candidate Name 2. Relevant Skills 3. Educational Background 4. Direct Team Leadership Experience (in years) 5. Direct Management Experience (in years) 6. Match percentage with the job description 7. Provide a resume summary in 5 bullet points highlighting the candidate's qualifications. """ model = genai.GenerativeModel('gemini-1.5-flash') response = model.generate_content(prompt) return response.text.strip() # Extract a detailed resume summary (focusing on leadership roles and team management experience) def extract_leadership_summary(response_text): leadership_summary = "Resume Summary: Leadership and Team Management Experience (in years)\n" lines = response_text.strip().split("\n") meaningful_lines = [line.strip() for line in lines if line.strip()] leadership_experience = [] for line in meaningful_lines: if "leadership" in line.lower() or "management" in line.lower() or "team" in line.lower(): leadership_experience.append(line) leadership_experience = leadership_experience[-5:] if len(leadership_experience) >= 5 else leadership_experience for idx, bullet in enumerate(leadership_experience, 1): leadership_summary += f"{idx}. {bullet}\n" return leadership_summary # Analyze the resume using Hugging Face RoBERTa def analyze_resume(resume_text): # Create input prompts for different aspects prompts = [ f"This resume shows strong managerial responsibilities: {resume_text}", f"This resume demonstrates excellent leadership skills: {resume_text}", f"This resume indicates significant work experience: {resume_text}", f"This resume indicates at least 2 years of relevant experience: {resume_text}" ] results = [] for prompt in prompts: # Tokenize the prompt with truncation inputs = tokenizer(prompt, return_tensors="pt", truncation=True) outputs = model(**inputs) predicted_class = torch.argmax(outputs.logits).item() results.append(predicted_class) # Interpret the results analysis = { "managerial_responsibilities": results[0] == 1, # Assuming 1 is positive sentiment "leadership_skills": results[1] == 1, "work_experience": results[2] == 1, "relevant_experience": results[3] == 1 } # Check if all criteria are met is_suitable = all(analysis.values()) return analysis, is_suitable # Streamlit interface to upload files and provide job description st.title("Resume ATS Analysis Tool") st.markdown("### Upload Resume and Job Description for Analysis") # File uploader for resume PDF uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"]) # Job description text input job_description = st.text_area("Job Description", height=200) if uploaded_file and job_description: analyze_button = st.button("Analyze") if analyze_button: resume_text = input_pdf_text(uploaded_file) response_text = get_gemini_response(resume_text, job_description) # Initialize an empty dictionary to hold the dynamic data data = {} # Extract candidate name name = extract_name_from_model_response(response_text) data['Candidate_Name'] = name if name != "Not Available" else "Not Available" # Extract contact info (email, phone) email, contact = extract_contact_info(resume_text) data['Email'] = email if email != "Not Available" else "Not Available" data['Contact'] = contact if contact != "Not Available" else "Not Available" # Extract skills skills = extract_skills(resume_text) data['Skills'] = skills if skills != "Not Available" else "Not Available" # Extract education education = extract_education(resume_text) data['Education'] = education if education != "Not Available" else "Not Available" # Extract team leadership and management experience team_leadership_years = extract_experience_years(resume_text) data['Team_Leadership_Experience (Years)'] = team_leadership_years management_experience_years = extract_experience_years(resume_text) data['Management_Experience (Years)'] = management_experience_years # Calculate match percentage dynamically match_percentage = calculate_match_percentage(resume_text, job_description) data['Match_Percentage'] = match_percentage # Calculate Job Description Match Score dynamically (based on match percentage) if match_percentage >= 80: job_description_match_score = "High" elif match_percentage >= 60: job_description_match_score = "Medium" else: job_description_match_score = "Low" data['Job_Description_Match_Score'] = job_description_match_score # Extract leadership and team management summary leadership_summary = extract_leadership_summary(response_text) data['Leadership_and_Team_Management_Summary'] = leadership_summary # Analyze the resume using Hugging Face RoBERTa analysis, is_suitable = analyze_resume(resume_text) data['Managerial_Responsibilities'] = analysis['managerial_responsibilities'] data['Leadership_Skills'] = analysis['leadership_skills'] data['Work_Experience'] = analysis['work_experience'] data['Relevant_Experience'] = analysis['relevant_experience'] data['Suitable_for_Role'] = is_suitable # Display the results as a table df = pd.DataFrame([data]) st.write(df) # Download the results as a CSV file csv = df.to_csv(index=False) st.download_button( label="Download Results as CSV", data=csv, file_name='resume_analysis_results.csv', mime='text/csv' ) else: st.write("Please upload a resume and provide a job description to analyze.")