Spaces:

sanjay11
/

resumesimilarity

Runtime error

App Files Files Community

resumesimilarity / app.py

sanjay11

Update app.py

d7eadd8 7 months ago

raw history blame contribute delete

No virus

5.84 kB

	import streamlit as st
	from transformers import BertForQuestionAnswering, BertTokenizer
	import torch
	from io import BytesIO
	import PyPDF2
	import pandas as pd
	import spacy
	from spacy.matcher import Matcher
	import os

	# Download the Spacy model if it's not already present
	if not spacy.util.is_package("en_core_web_sm"):
	os.system("python -m spacy download en_core_web_sm")


	# Load Spacy Model
	nlp = spacy.load("en_core_web_sm")

	# Extract Text from PDF
	def extract_text_from_pdf(uploaded_file):
	pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.read()))
	resume_text = ''
	for page in pdf_reader.pages:
	resume_text += page.extract_text()
	return resume_text

	# Load BERT Model for QA
	model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
	tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

	# Generate Answer from QA Model
	def answer_question(question, context, model, tokenizer):
	inputs = tokenizer.encode_plus(
	question,
	context,
	add_special_tokens=True,
	return_tensors="pt",
	truncation="only_second",
	max_length=512,
	)
	outputs = model(**inputs, return_dict=True)
	answer_start_scores = outputs.start_logits
	answer_end_scores = outputs.end_logits
	answer_start = torch.argmax(answer_start_scores)
	answer_end = torch.argmax(answer_end_scores) + 1
	input_ids = inputs["input_ids"].tolist()[0]
	answer = tokenizer.convert_tokens_to_string(
	tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
	)
	return answer

	# Extract Keywords for Resume Improvement
	def extract_keywords_for_sections(text):
	doc = nlp(text)
	skills_keywords = set()
	project_keywords = set()

	# Define patterns for skills and project ideas
	skill_patterns = [[{"POS": "NOUN", "OP": "+"}], [{"POS": "PROPN", "OP": "+"}]]
	project_patterns = [[{"POS": "VERB"}, {"POS": "NOUN", "OP": "+"}], [{"POS": "VERB"}, {"POS": "PROPN", "OP": "+"}]]

	matcher = Matcher(nlp.vocab)
	matcher.add("SKILLS", skill_patterns)
	matcher.add("PROJECTS", project_patterns)

	for match_id, start, end in matcher(doc):
	span = doc[start:end]
	if nlp.vocab.strings[match_id] == "SKILLS":
	skills_keywords.add(span.text)
	elif nlp.vocab.strings[match_id] == "PROJECTS":
	project_keywords.add(span.text)

	return skills_keywords, project_keywords

	# Suggest Resume Improvements
	def suggest_resume_improvements(resume_text, job_description):
	skills_keywords, project_keywords = extract_keywords_for_sections(job_description)
	missing_skills = [kw for kw in skills_keywords if kw.lower() not in resume_text.lower()]
	potential_projects = [f"Consider a project involving '{keyword}'." for keyword in project_keywords]

	skill_suggestions = [f"Consider highlighting your experience or skills related to '{keyword}'." for keyword in missing_skills[:5]]
	project_suggestions = potential_projects[:5]

	return skill_suggestions, project_suggestions

	# Analyze Matches between Resume and Job Description
	def analyze_matches(resume_text, job_description):
	resume_keywords = set(extract_keywords_for_sections(resume_text)[0])
	job_desc_keywords = set(extract_keywords_for_sections(job_description)[0])

	matches = resume_keywords & job_desc_keywords
	if matches:
	commentary = f"Your resume matches the following keywords from the job description: {', '.join(matches)}"
	else:
	commentary = "There are no direct keyword matches between your resume and the job description."

	return commentary

	# Initialize session state to store the log of QA pairs and satisfaction responses
	if 'qa_log' not in st.session_state:
	st.session_state.qa_log = []

	# Streamlit App Interface
	st.title('Resume Enhancement and Analysis App')

	# Resume PDF upload
	uploaded_file = st.file_uploader("Upload your resume (PDF format):", type='pdf')
	resume_text = ''
	if uploaded_file is not None:
	resume_text = extract_text_from_pdf(uploaded_file)
	st.write("Resume Text:")
	st.write(resume_text)

	# Question-Answer Functionality
	user_question = st.text_input("Ask a question based on your resume:")
	if user_question:
	answer = answer_question(user_question, resume_text, model, tokenizer)
	st.write("Answer:")
	st.write(answer)

	# Log the interaction
	st.session_state.qa_log.append({
	'Question': user_question,
	'Answer': answer,
	'Satisfaction': 'Pending'
	})

	# Job Description Input for Resume Improvement
	job_description = st.text_area("Input the job description here for resume improvement suggestions:")
	if job_description:
	skill_suggestions, project_suggestions = suggest_resume_improvements(resume_text, job_description)

	st.write('Technical Skill Improvement Suggestions:')
	for suggestion in skill_suggestions:
	st.write(suggestion)

	st.write('Notable Project Ideas:')
	for suggestion in project_suggestions:
	st.write(suggestion)

	# Analyze Matches and Provide Commentary
	match_commentary = analyze_matches(resume_text, job_description)
	st.write("Match Commentary:")
	st.write(match_commentary)

	# User Feedback and Interaction Log
	if st.session_state.qa_log:
	st.write("Interaction Log:")
	for i, interaction in enumerate(st.session_state.qa_log):
	if interaction['Satisfaction'] == 'Pending':
	satisfaction = st.radio(f'Are you satisfied with the answer to: "{interaction["Question"]}"?', ('Yes', 'No'), key=f'satisfaction_{i}')
	st.session_state.qa_log[i]['Satisfaction'] = satisfaction

	log_df = pd.DataFrame(st.session_state.qa_log)
	st.dataframe(log_df)