import streamlit as st from transformers import BertForQuestionAnswering, BertTokenizer import torch from io import BytesIO import PyPDF2 import pandas as pd import spacy from spacy.matcher import Matcher import os # Download the Spacy model if it's not already present if not spacy.util.is_package("en_core_web_sm"): os.system("python -m spacy download en_core_web_sm") # Load Spacy Model nlp = spacy.load("en_core_web_sm") # Extract Text from PDF def extract_text_from_pdf(uploaded_file): pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.read())) resume_text = '' for page in pdf_reader.pages: resume_text += page.extract_text() return resume_text # Load BERT Model for QA model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") # Generate Answer from QA Model def answer_question(question, context, model, tokenizer): inputs = tokenizer.encode_plus( question, context, add_special_tokens=True, return_tensors="pt", truncation="only_second", max_length=512, ) outputs = model(**inputs, return_dict=True) answer_start_scores = outputs.start_logits answer_end_scores = outputs.end_logits answer_start = torch.argmax(answer_start_scores) answer_end = torch.argmax(answer_end_scores) + 1 input_ids = inputs["input_ids"].tolist()[0] answer = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]) ) return answer # Extract Keywords for Resume Improvement def extract_keywords_for_sections(text): doc = nlp(text) skills_keywords = set() project_keywords = set() # Define patterns for skills and project ideas skill_patterns = [[{"POS": "NOUN", "OP": "+"}], [{"POS": "PROPN", "OP": "+"}]] project_patterns = [[{"POS": "VERB"}, {"POS": "NOUN", "OP": "+"}], [{"POS": "VERB"}, {"POS": "PROPN", "OP": "+"}]] matcher = Matcher(nlp.vocab) matcher.add("SKILLS", skill_patterns) matcher.add("PROJECTS", project_patterns) for match_id, start, end in matcher(doc): span = doc[start:end] if nlp.vocab.strings[match_id] == "SKILLS": skills_keywords.add(span.text) elif nlp.vocab.strings[match_id] == "PROJECTS": project_keywords.add(span.text) return skills_keywords, project_keywords # Suggest Resume Improvements def suggest_resume_improvements(resume_text, job_description): skills_keywords, project_keywords = extract_keywords_for_sections(job_description) missing_skills = [kw for kw in skills_keywords if kw.lower() not in resume_text.lower()] potential_projects = [f"Consider a project involving '{keyword}'." for keyword in project_keywords] skill_suggestions = [f"Consider highlighting your experience or skills related to '{keyword}'." for keyword in missing_skills[:5]] project_suggestions = potential_projects[:5] return skill_suggestions, project_suggestions # Analyze Matches between Resume and Job Description def analyze_matches(resume_text, job_description): resume_keywords = set(extract_keywords_for_sections(resume_text)[0]) job_desc_keywords = set(extract_keywords_for_sections(job_description)[0]) matches = resume_keywords & job_desc_keywords if matches: commentary = f"Your resume matches the following keywords from the job description: {', '.join(matches)}" else: commentary = "There are no direct keyword matches between your resume and the job description." return commentary # Initialize session state to store the log of QA pairs and satisfaction responses if 'qa_log' not in st.session_state: st.session_state.qa_log = [] # Streamlit App Interface st.title('Resume Enhancement and Analysis App') # Resume PDF upload uploaded_file = st.file_uploader("Upload your resume (PDF format):", type='pdf') resume_text = '' if uploaded_file is not None: resume_text = extract_text_from_pdf(uploaded_file) st.write("Resume Text:") st.write(resume_text) # Question-Answer Functionality user_question = st.text_input("Ask a question based on your resume:") if user_question: answer = answer_question(user_question, resume_text, model, tokenizer) st.write("Answer:") st.write(answer) # Log the interaction st.session_state.qa_log.append({ 'Question': user_question, 'Answer': answer, 'Satisfaction': 'Pending' }) # Job Description Input for Resume Improvement job_description = st.text_area("Input the job description here for resume improvement suggestions:") if job_description: skill_suggestions, project_suggestions = suggest_resume_improvements(resume_text, job_description) st.write('Technical Skill Improvement Suggestions:') for suggestion in skill_suggestions: st.write(suggestion) st.write('Notable Project Ideas:') for suggestion in project_suggestions: st.write(suggestion) # Analyze Matches and Provide Commentary match_commentary = analyze_matches(resume_text, job_description) st.write("Match Commentary:") st.write(match_commentary) # User Feedback and Interaction Log if st.session_state.qa_log: st.write("Interaction Log:") for i, interaction in enumerate(st.session_state.qa_log): if interaction['Satisfaction'] == 'Pending': satisfaction = st.radio(f'Are you satisfied with the answer to: "{interaction["Question"]}"?', ('Yes', 'No'), key=f'satisfaction_{i}') st.session_state.qa_log[i]['Satisfaction'] = satisfaction log_df = pd.DataFrame(st.session_state.qa_log) st.dataframe(log_df)