import os import re import fitz # Importing PyMuPDF for PDF text extraction import nltk from gensim.models.doc2vec import Doc2Vec, TaggedDocument from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import gradio as gr # Download NLTK data files nltk.download('punkt') nltk.download('stopwords') # Function to preprocess text def preprocess_text(text): text = re.sub(r'\W+', ' ', text.lower()) # Remove non-alphanumeric characters and lower case return text # Function to extract keywords using TF-IDF def extract_keywords_tfidf(text, max_features=50): vectorizer = TfidfVectorizer(stop_words='english', max_features=max_features) tfidf_matrix = vectorizer.fit_transform([text]) feature_names = vectorizer.get_feature_names_out() tfidf_scores = tfidf_matrix.toarray().flatten() keyword_scores = sorted(zip(tfidf_scores, feature_names), reverse=True) return [keyword for score, keyword in keyword_scores] # Function to extract text from a PDF def extract_text_from_pdf(pdf_path): document = fitz.open(pdf_path) text = "" for page_num in range(len(document)): page = document.load_page(page_num) text += page.get_text() return text # Function to give feedback on resume def give_feedback(resume_text, job_description): feedback = [] # Check formatting (example: consistency in bullet points) if '•' in resume_text and '-' in resume_text: feedback.append("Consider using a consistent bullet point style throughout your resume.") # Check for grammar and spelling if not any(re.findall(r'\bexperience\b|\beducation\b|\bskills\b', resume_text.lower())): feedback.append("Make sure your resume includes sections like Experience, Education, and Skills.") # Extract keywords and check relevance jd_keywords = extract_keywords_tfidf(preprocess_text(job_description)) resume_keywords = extract_keywords_tfidf(preprocess_text(resume_text)) common_keywords = set(jd_keywords).intersection(set(resume_keywords)) if len(common_keywords) < 8: feedback.append(f"Your resume could better match the job description. Consider adding keywords such as: {', '.join(jd_keywords[:5])}.") # Check for action verbs action_verbs = ["managed", "led", "developed", "designed", "implemented", "created"] if not any(verb in resume_text.lower() for verb in action_verbs): feedback.append("Consider using strong action verbs to describe your achievements and responsibilities.") if not re.search(r'\bsummary\b|\bobjective\b', resume_text, re.IGNORECASE): feedback.append("Consider adding a professional summary or objective statement to provide a quick overview of your qualifications.") # Check for quantifiable achievements if not re.findall(r'\d+', resume_text): feedback.append("Include quantifiable achievements in your experience section (e.g., increased sales by 20%).") # Provide positive feedback if none of the above conditions are met if not feedback: feedback.append("Your resume is well-aligned with the job description. Ensure to keep it updated with relevant keywords and achievements.") return feedback # Function to calculate TF-IDF cosine similarity score def tfidf_cosine_similarity(resume, jd): documents = [resume, jd] vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(documents) cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) return cosine_sim[0][0] # Function to calculate Doc2Vec cosine similarity score def doc2vec_cosine_similarity(resume, jd, model): resume_vector = model.infer_vector(resume.split()) jd_vector = model.infer_vector(jd.split()) cosine_sim = cosine_similarity([resume_vector], [jd_vector]) return cosine_sim[0][0] # Function to extract years of experience from resume def extract_years_of_experience(text): years = re.findall(r'(\d+)\s+year[s]*', text, re.IGNORECASE) if years: return sum(map(int, years)) return 0 # Function to extract information from resumes in a folder def extract_info_from_resumes(resume_files, job_description): data = [] # Train Doc2Vec model on resumes and job description documents = [] for file in resume_files: text = extract_text_from_pdf(file.name) documents.append(preprocess_text(text)) documents.append(preprocess_text(job_description)) tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)] model = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4) for file in resume_files: text = extract_text_from_pdf(file.name) preprocessed_text = preprocess_text(text) resume_keywords = extract_keywords_tfidf(preprocessed_text) years_of_experience = extract_years_of_experience(text) # Append years of experience to the resume keywords if years_of_experience > 0: resume_keywords.append(f"{years_of_experience} years experience") name = os.path.splitext(os.path.basename(file.name))[0] feedback = give_feedback(text, job_description) # Calculate scores jd_keywords = extract_keywords_tfidf(preprocess_text(job_description)) common_keywords = set(jd_keywords).intersection(set(resume_keywords)) keyword_match_score = len(common_keywords) # Count of common keywords as a whole number tfidf_score = tfidf_cosine_similarity(text, job_description) doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model) data.append({ 'Name': name, 'Keyword_Match_Score': keyword_match_score, # Whole number 'TFIDF_Score': tfidf_score, 'Doc2Vec_Score': doc2vec_score, 'Years_of_Experience': years_of_experience, 'Feedback': '; '.join(feedback), # Combine feedback into a single string }) return data # Function to save data to an Excel file def save_to_excel(data, output_file): df = pd.DataFrame(data) try: df.to_excel(output_file, index=False) return output_file except Exception as e: return f"Error saving file: {e}" # Gradio interface function def gradio_interface(resume_files, job_description): if resume_files: output_file = '/content/Resume_Analysis.xlsx' resumes = extract_info_from_resumes(resume_files, job_description) result = save_to_excel(resumes, output_file) else: result = "No resumes to process." return result # Gradio UI setup iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Files(label="Upload multiple Resumes", type="filepath"), # Accept multiple file uploads gr.Textbox(label="Job Description", lines=5, placeholder="Enter the job description here...") ], outputs=gr.File(label="Download Results"), # Provide the output file description="Upload multiple resume PDFs and provide a job description to analyze the resumes and get an Excel file with the results." ) # Launch the Gradio interface iface.launch()