Match / app.py
Unknown92's picture
Update app.py
18bf236
import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from keyphrasetransformer import KeyPhraseTransformer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import pandas as pd
kp = KeyPhraseTransformer()
#@st.cache_resource
#def load_model():
# model = SentenceTransformer('all-MiniLM-L6-v2')
# return model
#---------------------
# Prepare and tokenize dataset
dataset = load_dataset("Unknown92/Resume_dataset")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(examples):
return tokenizer(examples["Resume"], padding="max_length", truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["Train"].shuffle(seed=42).select(range(200))
small_eval_dataset = tokenized_datasets["Test"].shuffle(seed=42).select(range(200))
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
#---------------------
def calculate_similarity(model, text1, text2):
embedding1 = model.encode([text1])
embedding2 = model.encode([text2])
return cosine_similarity(embedding1, embedding2)[0][0]
def generate_wordcloud(text, title):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title(title)
st.pyplot(plt)
st.set_page_config(
page_title="Resume Keyword Identifier",
page_icon="+",
layout="wide",
initial_sidebar_state="expanded",
)
st.title("Resume Match Calculator")
model = load_model()
# Set the font size for the "Paste the Job Description" text
st.markdown("<style>#fc1{font-size: 20px !important;}</style>", unsafe_allow_html=True)
jd = st.text_area("Paste the Job Description:", height=100)
resume = st.text_area("Paste Your Resume:", height=100)
if st.button("Calculate Match Score"):
if jd and resume:
score = calculate_similarity(model, jd, resume)
jp=kp.get_key_phrases(jd)
rp=kp.get_key_phrases(resume)
# Find missing keywords in rp with respect to jp
missing_keywords = set(jp) - set(rp)
# Generate word clouds for JD and Resume
generate_wordcloud(' '.join(jp), 'Word Cloud for JD Keywords')
generate_wordcloud(' '.join(rp), 'Word Cloud for Resume Keywords')
# st.write(f"The match score is: {score}", )
st.write("The match score is:")
st.write(score)
st.write("JD Keywords:" )
st.write(jp)
st.write("Resume Keywords:" )
st.write(rp)
st.write("Missing Keywords in Resume:" )
st.write(list(missing_keywords))
else:
st.write("Please enter both the job description and resume.", )