job-fair / util /plot.py
Zekun Wu
update
5af5671
raw
history blame
5.19 kB
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
def create_score_plot(df):
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df.index, y=df['Privilege_Avg_Score'],
mode='lines+markers', name='Privilege',
text=df['Role'], hoverinfo='text+y'
))
fig.add_trace(go.Scatter(
x=df.index, y=df['Protect_Avg_Score'],
mode='lines+markers', name='Protection',
text=df['Role'], hoverinfo='text+y'
))
fig.add_trace(go.Scatter(
x=df.index, y=df['Neutral_Avg_Score'],
mode='lines+markers', name='Neutral',
text=df['Role'], hoverinfo='text+y'
))
fig.update_layout(
title=f'Scores of Resumes',
xaxis_title='Resume Index',
yaxis_title='Score',
legend_title='Score Type',
hovermode='closest'
)
return fig
def create_rank_plots(df):
fig = go.Figure()
# Add traces for ranks
fig.add_trace(go.Scatter(
x=df.index, y=df['Privilege_Rank'],
mode='lines+markers', name='Privilege',
text=df['Role'], hoverinfo='text+y'
))
fig.add_trace(go.Scatter(
x=df.index, y=df['Protect_Rank'],
mode='lines+markers', name='Protection',
text=df['Role'], hoverinfo='text+y'
))
fig.add_trace(go.Scatter(
x=df.index, y=df['Neutral_Rank'],
mode='lines+markers', name='Neutral',
text=df['Role'], hoverinfo='text+y'
))
# Update layout
fig.update_layout(
title='Ranks of Scores',
xaxis_title='Resume Index',
yaxis_title='Rank',
legend_title='Rank Type',
hovermode='closest'
)
return fig
def create_correlation_heatmaps(df):
scores_df = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']]
ranks_df = df[['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']]
# Pearson correlation
scores_corr_pearson = scores_df.corr(method='pearson')
ranks_corr_pearson = ranks_df.corr(method='pearson')
# Spearman correlation
scores_corr_spearman = scores_df.corr(method='spearman')
ranks_corr_spearman = ranks_df.corr(method='spearman')
# Kendall Tau correlation
scores_corr_kendall = scores_df.corr(method='kendall')
ranks_corr_kendall = ranks_df.corr(method='kendall')
# Plotting the heatmaps separately
heatmaps = {
'Scores Pearson Correlation': scores_corr_pearson,
'Ranks Pearson Correlation': ranks_corr_pearson,
'Scores Spearman Correlation': scores_corr_spearman,
'Ranks Spearman Correlation': ranks_corr_spearman,
'Scores Kendall Correlation': scores_corr_kendall,
'Ranks Kendall Correlation': ranks_corr_kendall
}
figs = {}
for title, corr_matrix in heatmaps.items():
fig = px.imshow(corr_matrix, text_auto=True, title=title)
figs[title] = fig
return figs
def point_to_line_distance(point, A, B):
"""Calculate the distance from a point to a line defined by two points A and B."""
line_vec = B - A
point_vec = point - A
line_len = np.linalg.norm(line_vec)
line_unitvec = line_vec / line_len
point_vec_scaled = point_vec / line_len
t = np.dot(line_unitvec, point_vec_scaled)
nearest = line_vec * t
dist = np.linalg.norm(nearest - point_vec)
return dist
def calculate_distances(data, point_A, point_B):
distances = data.apply(lambda row: point_to_line_distance(
np.array([row['Privilege_Avg_Score'], row['Protect_Avg_Score'], row['Neutral_Avg_Score']]),
point_A, point_B), axis=1)
return distances
def create_3d_plot(data):
# Define the ideal line (from point A to point B)
point_A = np.array([0, 0, 0])
point_B = np.array([10, 10, 10])
# Calculate distances
distances = calculate_distances(data, point_A, point_B)
data['Distance_to_Ideal'] = distances
# Label points that perfectly match the ideal line (distance close to 0)
tolerance = 1e-6
data['Perfect_Match'] = data['Distance_to_Ideal'].apply(lambda x: 'Yes' if x < tolerance else 'No')
# Create a 3D scatter plot of the scores
fig_3d = px.scatter_3d(data, x='Privilege_Avg_Score', y='Protect_Avg_Score', z='Neutral_Avg_Score',
color='Distance_to_Ideal', symbol='Perfect_Match',
hover_data={
'Occupation': True,
'Role': True,
'Privilege_Avg_Score': True,
'Protect_Avg_Score': True,
'Neutral_Avg_Score': True,
'Distance_to_Ideal': True,
'Perfect_Match': True
},
title='Occupation and Role Clusters based on Scores with Distance to Ideal Line')
# Add ideal line where Neutral = Protect = Privilege
ideal_line = go.Scatter3d(x=[0, 10], y=[0, 10], z=[0, 10], mode='lines', name='Ideal Line',
line=dict(color='green', dash='dash'))
fig_3d.add_trace(ideal_line)
return fig_3d