Spaces:

marchji2415
/

resumematcher

Sleeping

resumematcher / streamlit_second.py

March

first

46917c3 6 months ago

13.3 kB

	import json
	from typing import List

	import networkx as nx
	import nltk
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import streamlit as st
	from annotated_text import annotated_text, parameters
	from streamlit_extras import add_vertical_space as avs
	from streamlit_extras.badges import badge

	from scripts.utils import get_filenames_from_dir

	# Set page configuration
	st.set_page_config(
	page_title="Resume Matcher",
	page_icon="Assets/img/favicon.ico",
	initial_sidebar_state="auto",
	)

	# Check if NLTK punkt_tab data is available, if not, download it
	try:
	nltk.data.find("tokenizers/punkt_tab")
	except LookupError:
	nltk.download("punkt_tab")

	parameters.SHOW_LABEL_SEPARATOR = False
	parameters.BORDER_RADIUS = 3
	parameters.PADDING = "0.5 0.25rem"


	def create_star_graph(nodes_and_weights, title):
	# Create an empty graph
	G = nx.Graph()

	# Add the central node
	central_node = "resume"
	G.add_node(central_node)

	# Add nodes and edges with weights to the graph
	for node, weight in nodes_and_weights:
	G.add_node(node)
	G.add_edge(central_node, node, weight=weight * 100)

	# Get position layout for nodes
	pos = nx.spring_layout(G)

	# Create edge trace
	edge_x = []
	edge_y = []
	for edge in G.edges():
	x0, y0 = pos[edge[0]]
	x1, y1 = pos[edge[1]]
	edge_x.extend([x0, x1, None])
	edge_y.extend([y0, y1, None])

	edge_trace = go.Scatter(
	x=edge_x,
	y=edge_y,
	line=dict(width=0.5, color="#888"),
	hoverinfo="none",
	mode="lines",
	)

	# Create node trace
	node_x = []
	node_y = []
	for node in G.nodes():
	x, y = pos[node]
	node_x.append(x)
	node_y.append(y)

	node_trace = go.Scatter(
	x=node_x,
	y=node_y,
	mode="markers",
	hoverinfo="text",
	marker=dict(
	showscale=True,
	colorscale="Rainbow",
	reversescale=True,
	color=[],
	size=10,
	colorbar=dict(
	thickness=15,
	title="Node Connections",
	xanchor="left",
	titleside="right",
	),
	line_width=2,
	),
	)

	# Color node points by number of connections
	node_adjacencies = []
	node_text = []
	for node in G.nodes():
	adjacencies = list(G.adj[node]) # changes here
	node_adjacencies.append(len(adjacencies))
	node_text.append(f"{node}<br># of connections: {len(adjacencies)}")

	node_trace.marker.color = node_adjacencies
	node_trace.text = node_text

	# Create the figure
	fig = go.Figure(
	data=[edge_trace, node_trace],
	layout=go.Layout(
	title=title,
	titlefont_size=16,
	showlegend=False,
	hovermode="closest",
	margin=dict(b=20, l=5, r=5, t=40),
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	),
	)

	# Show the figure
	st.plotly_chart(fig)


	def create_annotated_text(
	input_string: str, word_list: List[str], annotation: str, color_code: str
	):
	# Tokenize the input string
	tokens = nltk.word_tokenize(input_string)

	# Convert the list to a set for quick lookups
	word_set = set(word_list)

	# Initialize an empty list to hold the annotated text
	annotated_text = []

	for token in tokens:
	# Check if the token is in the set
	if token in word_set:
	# If it is, append a tuple with the token, annotation, and color code
	annotated_text.append((token, annotation, color_code))
	else:
	# If it's not, just append the token as a string
	annotated_text.append(token)

	return annotated_text


	def read_json(filename):
	with open(filename) as f:
	data = json.load(f)
	return data


	def tokenize_string(input_string):
	tokens = nltk.word_tokenize(input_string)
	return tokens


	# Display the main title and subheaders
	st.title(":blue[Resume Matcher]")
	with st.sidebar:
	st.image("Assets/img/header_image.png")
	st.subheader(
	"Free and Open Source ATS to help your resume pass the screening stage."
	)
	st.markdown(
	"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
	)

	st.markdown(
	"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
	)

	badge(type="github", name="srbhr/Resume-Matcher")
	st.markdown("For updates follow me on Twitter.")
	badge(type="twitter", name="_srbhr_")
	st.markdown(
	"If you like the project and would like to further help in development please consider 👇"
	)
	badge(type="buymeacoffee", name="srbhr")

	st.divider()
	avs.add_vertical_space(1)

	resume_names = get_filenames_from_dir("Data/Processed/Resumes")

	output = st.selectbox(
	f"There are {len(resume_names)} resumes present. Please select one from the menu below:",
	resume_names,
	)

	avs.add_vertical_space(5)

	selected_file = read_json("Data/Processed/Resumes/" + output)

	avs.add_vertical_space(2)
	st.markdown("#### Parsed Resume Data")
	st.caption(
	"This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS."
	)
	st.caption("Utilize this to understand how to make your resume ATS friendly.")
	avs.add_vertical_space(3)
	# st.json(selected_file)
	st.write(selected_file["clean_data"])

	avs.add_vertical_space(3)
	st.write("Now let's take a look at the extracted keywords from the resume.")

	annotated_text(
	create_annotated_text(
	selected_file["clean_data"],
	selected_file["extracted_keywords"],
	"KW",
	"#0B666A",
	)
	)

	avs.add_vertical_space(5)
	st.write("Now let's take a look at the extracted entities from the resume.")

	# Call the function with your data
	create_star_graph(selected_file["keyterms"], "Entities from Resume")

	df2 = pd.DataFrame(selected_file["keyterms"], columns=["keyword", "value"])

	# Create the dictionary
	keyword_dict = {}
	for keyword, value in selected_file["keyterms"]:
	keyword_dict[keyword] = value * 100

	fig = go.Figure(
	data=[
	go.Table(
	header=dict(
	values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
	),
	cells=dict(
	values=[list(keyword_dict.keys()), list(keyword_dict.values())],
	line_color="darkslategray",
	fill_color="#6DA9E4",
	),
	)
	]
	)
	st.plotly_chart(fig)

	st.divider()

	fig = px.treemap(
	df2,
	path=["keyword"],
	values="value",
	color_continuous_scale="Rainbow",
	title="Key Terms/Topics Extracted from your Resume",
	)
	st.write(fig)

	avs.add_vertical_space(5)

	job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")

	output = st.selectbox(
	f"There are {len(job_descriptions)} job descriptions present. Please select one from the menu below:",
	job_descriptions,
	)

	avs.add_vertical_space(5)

	selected_jd = read_json("Data/Processed/JobDescription/" + output)

	avs.add_vertical_space(2)
	st.markdown("#### Job Description")
	st.caption(
	"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
	)
	avs.add_vertical_space(3)
	# st.json(selected_file)
	st.write(selected_jd["clean_data"])

	st.markdown("#### Common Words between Job Description and Resumes Highlighted.")

	annotated_text(
	create_annotated_text(
	selected_file["clean_data"], selected_jd["extracted_keywords"], "JD", "#F24C3D"
	)
	)

	st.write("Now let's take a look at the extracted entities from the job description.")

	# Call the function with your data
	create_star_graph(selected_jd["keyterms"], "Entities from Job Description")

	df2 = pd.DataFrame(selected_jd["keyterms"], columns=["keyword", "value"])

	# Create the dictionary
	keyword_dict = {}
	for keyword, value in selected_jd["keyterms"]:
	keyword_dict[keyword] = value * 100

	fig = go.Figure(
	data=[
	go.Table(
	header=dict(
	values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
	),
	cells=dict(
	values=[list(keyword_dict.keys()), list(keyword_dict.values())],
	line_color="darkslategray",
	fill_color="#6DA9E4",
	),
	)
	]
	)
	st.plotly_chart(fig)

	st.divider()

	fig = px.treemap(
	df2,
	path=["keyword"],
	values="value",
	color_continuous_scale="Rainbow",
	title="Key Terms/Topics Extracted from the selected Job Description",
	)
	st.write(fig)

	avs.add_vertical_space(5)

	st.divider()

	st.markdown("## Vector Similarity Scores")
	st.caption("Powered by Qdrant Vector Search")
	st.info("These are pre-computed queries", icon="ℹ")
	st.warning(
	"Running Qdrant or Sentence Transformers without having capacity is not recommended",
	icon="⚠",
	)


	# Your data
	data = [
	{
	"text": "{'resume': 'Alfred Pennyworth",
	"query": "Job Description Product Manager",
	"score": 0.62658,
	},
	{
	"text": "{'resume': 'Barry Allen",
	"query": "Job Description Product Manager",
	"score": 0.43777737,
	},
	{
	"text": "{'resume': 'Bruce Wayne ",
	"query": "Job Description Product Manager",
	"score": 0.39835533,
	},
	{
	"text": "{'resume': 'JOHN DOE",
	"query": "Job Description Product Manager",
	"score": 0.3915512,
	},
	{
	"text": "{'resume': 'Harvey Dent",
	"query": "Job Description Product Manager",
	"score": 0.3519544,
	},
	{
	"text": "{'resume': 'Barry Allen",
	"query": "Job Description Senior Full Stack Engineer",
	"score": 0.6541866,
	},
	{
	"text": "{'resume': 'Alfred Pennyworth",
	"query": "Job Description Senior Full Stack Engineer",
	"score": 0.59806436,
	},
	{
	"text": "{'resume': 'JOHN DOE",
	"query": "Job Description Senior Full Stack Engineer",
	"score": 0.5951386,
	},
	{
	"text": "{'resume': 'Bruce Wayne ",
	"query": "Job Description Senior Full Stack Engineer",
	"score": 0.57700855,
	},
	{
	"text": "{'resume': 'Harvey Dent",
	"query": "Job Description Senior Full Stack Engineer",
	"score": 0.38489106,
	},
	{
	"text": "{'resume': 'Barry Allen",
	"query": "Job Description Front End Engineer",
	"score": 0.76813436,
	},
	{
	"text": "{'resume': 'Bruce Wayne'",
	"query": "Job Description Front End Engineer",
	"score": 0.60440844,
	},
	{
	"text": "{'resume': 'JOHN DOE",
	"query": "Job Description Front End Engineer",
	"score": 0.56080043,
	},
	{
	"text": "{'resume': 'Alfred Pennyworth",
	"query": "Job Description Front End Engineer",
	"score": 0.5395049,
	},
	{
	"text": "{'resume': 'Harvey Dent",
	"query": "Job Description Front End Engineer",
	"score": 0.3859515,
	},
	{
	"text": "{'resume': 'JOHN DOE",
	"query": "Job Description Java Developer",
	"score": 0.5449441,
	},
	{
	"text": "{'resume': 'Alfred Pennyworth",
	"query": "Job Description Java Developer",
	"score": 0.53476423,
	},
	{
	"text": "{'resume': 'Barry Allen",
	"query": "Job Description Java Developer",
	"score": 0.5313871,
	},
	{
	"text": "{'resume': 'Bruce Wayne ",
	"query": "Job Description Java Developer",
	"score": 0.44446343,
	},
	{
	"text": "{'resume': 'Harvey Dent",
	"query": "Job Description Java Developer",
	"score": 0.3616274,
	},
	]

	# Create a DataFrame
	df = pd.DataFrame(data)

	# Create different DataFrames based on the query and sort by score
	df1 = df[df["query"] == "Job Description Product Manager"].sort_values(
	by="score", ascending=False
	)
	df2 = df[df["query"] == "Job Description Senior Full Stack Engineer"].sort_values(
	by="score", ascending=False
	)
	df3 = df[df["query"] == "Job Description Front End Engineer"].sort_values(
	by="score", ascending=False
	)
	df4 = df[df["query"] == "Job Description Java Developer"].sort_values(
	by="score", ascending=False
	)


	def plot_df(df, title):
	fig = px.bar(df, x="text", y=df["score"] * 100, title=title)
	st.plotly_chart(fig)


	st.markdown("### Bar plots of scores based on similarity to Job Description.")

	st.subheader(":blue[Legend]")
	st.text("Alfred Pennyworth : Product Manager")
	st.text("Barry Allen : Front End Developer")
	st.text("Harvey Dent : Machine Learning Engineer")
	st.text("Bruce Wayne : Fullstack Developer (MERN)")
	st.text("John Doe : Fullstack Developer (Java)")


	plot_df(df1, "Job Description Product Manager 10+ Years of Exper")
	plot_df(df2, "Job Description Senior Full Stack Engineer 5+ Year")
	plot_df(df3, "Job Description Front End Engineer 2 Years of Expe")
	plot_df(df4, "Job Description Java Developer 3 Years of Experien")


	avs.add_vertical_space(3)

	# Go back to top
	st.markdown("[:arrow_up: Back to Top](#resume-matcher)")