Spaces:

dnth
/

riasec-prediction-v0

Sleeping

App Files Files Community

riasec-prediction-v0 / app.py

dnth

Create app.py

fd242ab verified about 2 months ago

raw

history blame contribute delete

7.56 kB

	import gradio as gr
	import joblib
	import numpy as np
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from collections import OrderedDict

	class RiasecPredictor:
	def __init__(self, regressor_path='riasec_regressor.pkl',
	embedding_model_path='all-MiniLM-L6-v2'):
	"""
	Load saved models for RIASEC prediction
	"""
	print("Loading models...")
	self.embedding_model = SentenceTransformer(embedding_model_path)
	self.regressor = joblib.load(regressor_path)
	self.riasec_labels = ['R', 'I', 'A', 'S', 'E', 'C']
	print("✅ Models loaded successfully!")

	def predict(self, job_title=None, job_description=None, full_text=None, sort_by_score=True):
	"""
	Predict RIASEC scores for a job

	Args:
	job_title (str): Job title
	job_description (str): Job description
	full_text (str): Complete job text (alternative to title + description)
	sort_by_score (bool): If True, return results sorted by score (highest to lowest)

	Returns:
	dict or OrderedDict: RIASEC scores clamped to [1.0, 7.0]
	"""
	# Handle input
	if full_text is not None:
	text = full_text
	elif job_title is not None and job_description is not None:
	text = f"{job_title} {job_description}"
	else:
	raise ValueError("Provide either full_text OR both job_title and job_description")

	# Generate embedding
	embedding = self.embedding_model.encode([text], convert_to_numpy=True)

	# Make prediction
	prediction = self.regressor.predict(embedding)[0]
	prediction = np.clip(prediction, 1.0, 7.0)

	# Create dictionary
	riasec_dict = dict(zip(self.riasec_labels, prediction.tolist()))

	# Sort by score if requested
	if sort_by_score:
	# Sort by value (score) in descending order
	sorted_riasec = OrderedDict(
	sorted(riasec_dict.items(), key=lambda x: x[1], reverse=True)
	)
	return sorted_riasec
	else:
	return riasec_dict

	def predict_with_names(self, job_title=None, job_description=None, full_text=None):
	"""
	Predict RIASEC scores with full names in R-I-A-S-E-C order

	Returns:
	OrderedDict: Full RIASEC names with scores, in R-I-A-S-E-C order
	"""
	# Get results with codes (not sorted by score)
	results = self.predict(job_title, job_description, full_text, sort_by_score=False)

	# Map codes to full names in R-I-A-S-E-C order
	code_to_name = {
	'R': 'Realistic',
	'I': 'Investigative',
	'A': 'Artistic',
	'S': 'Social',
	'E': 'Enterprising',
	'C': 'Conventional'
	}

	# Create ordered dict with full names in R-I-A-S-E-C order
	ordered_with_names = OrderedDict()
	riasec_order = ['R', 'I', 'A', 'S', 'E', 'C']

	for code in riasec_order:
	if code in results:
	ordered_with_names[code_to_name[code]] = results[code]

	return ordered_with_names

	# Initialize predictor once when the script runs
	predictor = RiasecPredictor()

	def predict_riasec(job_title, job_description):
	"""
	Wrapper function for Gradio interface
	"""
	try:
	if job_title.strip() and job_description.strip():
	# Use job_title and job_description
	# Always use abbreviations (R, I, A, S, E, C) as default
	# Use sort_by_score=False to maintain R-I-A-S-E-C order for the bar chart
	result = predictor.predict(job_title=job_title, job_description=job_description, sort_by_score=False)
	else:
	return None, "Please provide both job title and job description."

	# Skip text formatting since we're removing the text output

	# Prepare data for gr.BarPlot
	# Convert to the format expected by gr.BarPlot (pandas DataFrame)
	# Maintain R-I-A-S-E-C order regardless of scores
	riasec_order = ['R', 'I', 'A', 'S', 'E', 'C']

	# Get the scores in the correct order
	ordered_labels = []
	ordered_values = []

	for riasec_type in riasec_order:
	if riasec_type in result:
	ordered_labels.append(riasec_type)
	ordered_values.append(result[riasec_type])

	# Create pandas DataFrame for BarPlot
	bar_data = pd.DataFrame({
	"RIASEC": ordered_labels,
	"Score": ordered_values
	})

	# Prepare data for Top 3 RIASEC panel - only codes without scores, formatted as markdown
	# Sort results by score for the top 3 display
	sorted_result = OrderedDict(sorted(result.items(), key=lambda x: x[1], reverse=True))

	top_3_result = "### Top 3 RIASEC Types\n\n"
	for i, (key, value) in enumerate(sorted_result.items()):
	if i < 3: # Only take top 3
	# Add some styling to make each RIASEC code more prominent with better contrast
	top_3_result += f"<div style='font-size: 1.5em; font-weight: bold; margin: 5px 0; padding: 10px; background-color: #f0f0f0; color: #000000; border-radius: 5px; text-align: center; border: 1px solid #cccccc;'>{key}</div>\n"
	else:
	break

	return bar_data, top_3_result
	except Exception as e:
	print(f"Error in predict_riasec: {str(e)}") # Add debug output
	return None, f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="RIASEC Predictor") as demo:
	gr.Markdown("# RIASEC Predictor")
	gr.Markdown("Predict RIASEC personality type scores for job descriptions")

	with gr.Row():
	with gr.Column():
	job_title = gr.Textbox(label="Job Title", placeholder="e.g., Data Scientist")
	job_description = gr.Textbox(label="Job Description", placeholder="e.g., Analyze large datasets...", lines=4)
	submit_btn = gr.Button("Predict RIASEC Scores", variant="primary")

	with gr.Column():
	output_chart = gr.BarPlot(
	x="RIASEC",
	y="Score",
	title="RIASEC Scores",
	orientation="h", # horizontal orientation
	color="RIASEC",
	show_legend=False,
	height=400
	)

	with gr.Column():
	top_3_output = gr.Markdown(label="Top 3 RIASEC", elem_classes="top-3-riasec")

	gr.Markdown("Note: Please provide both job title and job description.")

	submit_btn.click(
	fn=predict_riasec,
	inputs=[job_title, job_description],
	outputs=[output_chart, top_3_output],
	show_progress=True
	)

	# Example inputs
	gr.Examples(
	examples=[
	["Data Scientist", "Analyze large datasets and build machine learning models"],
	["Graphic Designer", "Create visual content and design marketing materials"],
	["Software Engineer", "Develop and maintain software applications"]
	],
	inputs=[job_title, job_description],
	outputs=[output_chart, top_3_output],
	fn=predict_riasec,
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.queue().launch(share=True)