sanjay11's picture
Update app.py
d7eadd8
import streamlit as st
from transformers import BertForQuestionAnswering, BertTokenizer
import torch
from io import BytesIO
import PyPDF2
import pandas as pd
import spacy
from spacy.matcher import Matcher
import os
# Download the Spacy model if it's not already present
if not spacy.util.is_package("en_core_web_sm"):
os.system("python -m spacy download en_core_web_sm")
# Load Spacy Model
nlp = spacy.load("en_core_web_sm")
# Extract Text from PDF
def extract_text_from_pdf(uploaded_file):
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.read()))
resume_text = ''
for page in pdf_reader.pages:
resume_text += page.extract_text()
return resume_text
# Load BERT Model for QA
model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
# Generate Answer from QA Model
def answer_question(question, context, model, tokenizer):
inputs = tokenizer.encode_plus(
question,
context,
add_special_tokens=True,
return_tensors="pt",
truncation="only_second",
max_length=512,
)
outputs = model(**inputs, return_dict=True)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1
input_ids = inputs["input_ids"].tolist()[0]
answer = tokenizer.convert_tokens_to_string(
tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
)
return answer
# Extract Keywords for Resume Improvement
def extract_keywords_for_sections(text):
doc = nlp(text)
skills_keywords = set()
project_keywords = set()
# Define patterns for skills and project ideas
skill_patterns = [[{"POS": "NOUN", "OP": "+"}], [{"POS": "PROPN", "OP": "+"}]]
project_patterns = [[{"POS": "VERB"}, {"POS": "NOUN", "OP": "+"}], [{"POS": "VERB"}, {"POS": "PROPN", "OP": "+"}]]
matcher = Matcher(nlp.vocab)
matcher.add("SKILLS", skill_patterns)
matcher.add("PROJECTS", project_patterns)
for match_id, start, end in matcher(doc):
span = doc[start:end]
if nlp.vocab.strings[match_id] == "SKILLS":
skills_keywords.add(span.text)
elif nlp.vocab.strings[match_id] == "PROJECTS":
project_keywords.add(span.text)
return skills_keywords, project_keywords
# Suggest Resume Improvements
def suggest_resume_improvements(resume_text, job_description):
skills_keywords, project_keywords = extract_keywords_for_sections(job_description)
missing_skills = [kw for kw in skills_keywords if kw.lower() not in resume_text.lower()]
potential_projects = [f"Consider a project involving '{keyword}'." for keyword in project_keywords]
skill_suggestions = [f"Consider highlighting your experience or skills related to '{keyword}'." for keyword in missing_skills[:5]]
project_suggestions = potential_projects[:5]
return skill_suggestions, project_suggestions
# Analyze Matches between Resume and Job Description
def analyze_matches(resume_text, job_description):
resume_keywords = set(extract_keywords_for_sections(resume_text)[0])
job_desc_keywords = set(extract_keywords_for_sections(job_description)[0])
matches = resume_keywords & job_desc_keywords
if matches:
commentary = f"Your resume matches the following keywords from the job description: {', '.join(matches)}"
else:
commentary = "There are no direct keyword matches between your resume and the job description."
return commentary
# Initialize session state to store the log of QA pairs and satisfaction responses
if 'qa_log' not in st.session_state:
st.session_state.qa_log = []
# Streamlit App Interface
st.title('Resume Enhancement and Analysis App')
# Resume PDF upload
uploaded_file = st.file_uploader("Upload your resume (PDF format):", type='pdf')
resume_text = ''
if uploaded_file is not None:
resume_text = extract_text_from_pdf(uploaded_file)
st.write("Resume Text:")
st.write(resume_text)
# Question-Answer Functionality
user_question = st.text_input("Ask a question based on your resume:")
if user_question:
answer = answer_question(user_question, resume_text, model, tokenizer)
st.write("Answer:")
st.write(answer)
# Log the interaction
st.session_state.qa_log.append({
'Question': user_question,
'Answer': answer,
'Satisfaction': 'Pending'
})
# Job Description Input for Resume Improvement
job_description = st.text_area("Input the job description here for resume improvement suggestions:")
if job_description:
skill_suggestions, project_suggestions = suggest_resume_improvements(resume_text, job_description)
st.write('Technical Skill Improvement Suggestions:')
for suggestion in skill_suggestions:
st.write(suggestion)
st.write('Notable Project Ideas:')
for suggestion in project_suggestions:
st.write(suggestion)
# Analyze Matches and Provide Commentary
match_commentary = analyze_matches(resume_text, job_description)
st.write("Match Commentary:")
st.write(match_commentary)
# User Feedback and Interaction Log
if st.session_state.qa_log:
st.write("Interaction Log:")
for i, interaction in enumerate(st.session_state.qa_log):
if interaction['Satisfaction'] == 'Pending':
satisfaction = st.radio(f'Are you satisfied with the answer to: "{interaction["Question"]}"?', ('Yes', 'No'), key=f'satisfaction_{i}')
st.session_state.qa_log[i]['Satisfaction'] = satisfaction
log_df = pd.DataFrame(st.session_state.qa_log)
st.dataframe(log_df)