import subprocess import sys import streamlit as st from transformers import BertTokenizer, BertForSequenceClassification, RagTokenizer, RagRetriever, RagSequenceForGeneration from sklearn.metrics.pairwise import cosine_similarity import spacy import os import re from io import StringIO import PyPDF2 import docx import torch import pandas as pd from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS # Ensure the 'en_core_web_sm' model is installed try: spacy.load("en_core_web_sm") except: subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) # Load the pre-trained model for candidate matching (BERT or similar) model_name = "bert-base-uncased" tokenizer = BertTokenizer.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained(model_name) nlp = spacy.load("en_core_web_sm") # Initialize RAG model and retriever rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq") rag_retriever = RagRetriever.from_pretrained("facebook/rag-token-nq") rag_model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq") # Function to extract text from PDF def extract_text_from_pdf(file): pdf_reader = PyPDF2.PdfReader(file) text = '' for page in range(len(pdf_reader.pages)): text += pdf_reader.pages[page].extract_text() return text # Function to extract text from Word document def extract_text_from_docx(file): doc = docx.Document(file) text = '' for para in doc.paragraphs: text += para.text + '\n' return text # Function to extract resume text based on file type def extract_resume_text(uploaded_file): if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file) elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file) elif uploaded_file.name.endswith('.txt'): return str(uploaded_file.getvalue(), "utf-8") else: return None # Function to extract Name, Email, and Phone Number from resume text def extract_personal_info(resume_text): # Use spaCy's NER to extract named entities (like names) doc = nlp(resume_text) name = None for ent in doc.ents: if ent.label_ == 'PERSON': name = ent.text break # Assuming the first PERSON entity is the name # Use regular expressions to find email and phone numbers email_pattern = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+' phone_pattern = r'(\+?\d{1,3}[-.\s]?)?(\(?\d{1,4}?\)?[-.\s]?)?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}' email = re.search(email_pattern, resume_text) phone = re.search(phone_pattern, resume_text) email = email.group() if email else None phone = phone.group() if phone else None return name, email, phone # Function to process job description and resume using BERT def match_resume_to_job(job_description, resume_text): inputs = tokenizer([job_description, resume_text], return_tensors="pt", padding=True, truncation=True) with torch.no_grad(): outputs = model(**inputs) return cosine_similarity(outputs.logits[0].reshape(1, -1), outputs.logits[1].reshape(1, -1)) # Function to generate job recommendations using RAG def generate_job_recommendations(job_desc): inputs = rag_tokenizer(job_desc, return_tensors="pt") # Retrieve relevant documents from the knowledge base retrieved_docs = rag_retriever.retrieve(inputs['input_ids']) # Use RAG model to generate recommendations output = rag_model.generate(input_ids=inputs['input_ids'], context_input_ids=retrieved_docs['context_input_ids']) recommendations = rag_tokenizer.decode(output[0], skip_special_tokens=True) return recommendations # Streamlit UI Customization st.set_page_config(page_title="AI-Powered Candidate Matching", page_icon=":guardsman:", layout="wide") # Add background image and styling using CSS st.markdown(""" """, unsafe_allow_html=True) # Title with custom styling st.markdown('', unsafe_allow_html=True) # Sidebar for Resume Upload and Job Description Input st.sidebar.header("Upload Resume and Job Description") resume_file = st.sidebar.file_uploader("Upload Resume (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"]) job_desc = st.sidebar.text_area("Enter Job Description") # List of common skills for job matching (this can be expanded for your specific use case) common_skills = ["Python", "Java", "C++", "SQL", "JavaScript", "HTML", "CSS", "AWS", "Docker", "Kubernetes", "Machine Learning", "Deep Learning", "Data Science", "TensorFlow", "PyTorch", "Tableau", "Power BI"] # Match Score and Visualization if resume_file and job_desc: resume_text = extract_resume_text(resume_file) if resume_text: # Extract personal details name, email, phone = extract_personal_info(resume_text) # Display the extracted information st.markdown(f'

Personal Information:

', unsafe_allow_html=True) if name: st.write(f"Name: {name}") else: st.write("Name: Not found") if email: st.write(f"Email: {email}") else: st.write("Email: Not found") if phone: st.write(f"Phone: {phone}") else: st.write("Phone: Not found") st.markdown(f'

Resume Text Extracted:

', unsafe_allow_html=True) st.text_area("Resume Content", resume_text, height=300) # Match Resume with Job Description st.write("### Matching with Job Description...") match_score = match_resume_to_job(job_desc, resume_text) match_percentage = match_score[0][0] * 100 # Convert to percentage # Show Match Percentage and Feedback st.markdown(f'

Match Percentage: {match_percentage:.2f}%

', unsafe_allow_html=True) if match_percentage > 80: st.success("This candidate is an excellent match for the job!") elif match_percentage > 50: st.warning("This candidate might be a good fit for the job, but further review is recommended.") else: st.error("The match is not strong. This candidate may not be a good fit for the job.") # Display Key Matching Skills st.markdown('

Key Matching Skills:

', unsafe_allow_html=True) job_tokens = set(job_desc.lower().split()) resume_tokens = set(resume_text.lower().split()) common_tokens = job_tokens.intersection(resume_tokens) matching_skills = [skill for skill in common_tokens if skill.capitalize() in common_skills] if matching_skills: st.write(f"Matching Skills: {', '.join(matching_skills)}") else: st.write("No matching skills found based on the job description. Please review the resume or job description.") # Generate Job Recommendations st.write("### Job Recommendations...") recommendations = generate_job_recommendations(job_desc) st.write(recommendations) else: st.error("Could not extract text from the uploaded resume. Please upload a valid resume.") else: st.info("Upload a resume and enter a job description to get started.") # Footer st.markdown('', unsafe_allow_html=True)