Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from collections import OrderedDict | |
| class RiasecPredictor: | |
| def __init__(self, regressor_path='riasec_regressor.pkl', | |
| embedding_model_path='all-MiniLM-L6-v2'): | |
| """ | |
| Load saved models for RIASEC prediction | |
| """ | |
| print("Loading models...") | |
| self.embedding_model = SentenceTransformer(embedding_model_path) | |
| self.regressor = joblib.load(regressor_path) | |
| self.riasec_labels = ['R', 'I', 'A', 'S', 'E', 'C'] | |
| print("✅ Models loaded successfully!") | |
| def predict(self, job_title=None, job_description=None, full_text=None, sort_by_score=True): | |
| """ | |
| Predict RIASEC scores for a job | |
| Args: | |
| job_title (str): Job title | |
| job_description (str): Job description | |
| full_text (str): Complete job text (alternative to title + description) | |
| sort_by_score (bool): If True, return results sorted by score (highest to lowest) | |
| Returns: | |
| dict or OrderedDict: RIASEC scores clamped to [1.0, 7.0] | |
| """ | |
| # Handle input | |
| if full_text is not None: | |
| text = full_text | |
| elif job_title is not None and job_description is not None: | |
| text = f"{job_title} {job_description}" | |
| else: | |
| raise ValueError("Provide either full_text OR both job_title and job_description") | |
| # Generate embedding | |
| embedding = self.embedding_model.encode([text], convert_to_numpy=True) | |
| # Make prediction | |
| prediction = self.regressor.predict(embedding)[0] | |
| prediction = np.clip(prediction, 1.0, 7.0) | |
| # Create dictionary | |
| riasec_dict = dict(zip(self.riasec_labels, prediction.tolist())) | |
| # Sort by score if requested | |
| if sort_by_score: | |
| # Sort by value (score) in descending order | |
| sorted_riasec = OrderedDict( | |
| sorted(riasec_dict.items(), key=lambda x: x[1], reverse=True) | |
| ) | |
| return sorted_riasec | |
| else: | |
| return riasec_dict | |
| def predict_with_names(self, job_title=None, job_description=None, full_text=None): | |
| """ | |
| Predict RIASEC scores with full names in R-I-A-S-E-C order | |
| Returns: | |
| OrderedDict: Full RIASEC names with scores, in R-I-A-S-E-C order | |
| """ | |
| # Get results with codes (not sorted by score) | |
| results = self.predict(job_title, job_description, full_text, sort_by_score=False) | |
| # Map codes to full names in R-I-A-S-E-C order | |
| code_to_name = { | |
| 'R': 'Realistic', | |
| 'I': 'Investigative', | |
| 'A': 'Artistic', | |
| 'S': 'Social', | |
| 'E': 'Enterprising', | |
| 'C': 'Conventional' | |
| } | |
| # Create ordered dict with full names in R-I-A-S-E-C order | |
| ordered_with_names = OrderedDict() | |
| riasec_order = ['R', 'I', 'A', 'S', 'E', 'C'] | |
| for code in riasec_order: | |
| if code in results: | |
| ordered_with_names[code_to_name[code]] = results[code] | |
| return ordered_with_names | |
| # Initialize predictor once when the script runs | |
| predictor = RiasecPredictor() | |
| def predict_riasec(job_title, job_description): | |
| """ | |
| Wrapper function for Gradio interface | |
| """ | |
| try: | |
| if job_title.strip() and job_description.strip(): | |
| # Use job_title and job_description | |
| # Always use abbreviations (R, I, A, S, E, C) as default | |
| # Use sort_by_score=False to maintain R-I-A-S-E-C order for the bar chart | |
| result = predictor.predict(job_title=job_title, job_description=job_description, sort_by_score=False) | |
| else: | |
| return None, "Please provide both job title and job description." | |
| # Skip text formatting since we're removing the text output | |
| # Prepare data for gr.BarPlot | |
| # Convert to the format expected by gr.BarPlot (pandas DataFrame) | |
| # Maintain R-I-A-S-E-C order regardless of scores | |
| riasec_order = ['R', 'I', 'A', 'S', 'E', 'C'] | |
| # Get the scores in the correct order | |
| ordered_labels = [] | |
| ordered_values = [] | |
| for riasec_type in riasec_order: | |
| if riasec_type in result: | |
| ordered_labels.append(riasec_type) | |
| ordered_values.append(result[riasec_type]) | |
| # Create pandas DataFrame for BarPlot | |
| bar_data = pd.DataFrame({ | |
| "RIASEC": ordered_labels, | |
| "Score": ordered_values | |
| }) | |
| # Prepare data for Top 3 RIASEC panel - only codes without scores, formatted as markdown | |
| # Sort results by score for the top 3 display | |
| sorted_result = OrderedDict(sorted(result.items(), key=lambda x: x[1], reverse=True)) | |
| top_3_result = "### Top 3 RIASEC Types\n\n" | |
| for i, (key, value) in enumerate(sorted_result.items()): | |
| if i < 3: # Only take top 3 | |
| # Add some styling to make each RIASEC code more prominent with better contrast | |
| top_3_result += f"<div style='font-size: 1.5em; font-weight: bold; margin: 5px 0; padding: 10px; background-color: #f0f0f0; color: #000000; border-radius: 5px; text-align: center; border: 1px solid #cccccc;'>{key}</div>\n" | |
| else: | |
| break | |
| return bar_data, top_3_result | |
| except Exception as e: | |
| print(f"Error in predict_riasec: {str(e)}") # Add debug output | |
| return None, f"Error: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks(title="RIASEC Predictor") as demo: | |
| gr.Markdown("# RIASEC Predictor") | |
| gr.Markdown("Predict RIASEC personality type scores for job descriptions") | |
| with gr.Row(): | |
| with gr.Column(): | |
| job_title = gr.Textbox(label="Job Title", placeholder="e.g., Data Scientist") | |
| job_description = gr.Textbox(label="Job Description", placeholder="e.g., Analyze large datasets...", lines=4) | |
| submit_btn = gr.Button("Predict RIASEC Scores", variant="primary") | |
| with gr.Column(): | |
| output_chart = gr.BarPlot( | |
| x="RIASEC", | |
| y="Score", | |
| title="RIASEC Scores", | |
| orientation="h", # horizontal orientation | |
| color="RIASEC", | |
| show_legend=False, | |
| height=400 | |
| ) | |
| with gr.Column(): | |
| top_3_output = gr.Markdown(label="Top 3 RIASEC", elem_classes="top-3-riasec") | |
| gr.Markdown("Note: Please provide both job title and job description.") | |
| submit_btn.click( | |
| fn=predict_riasec, | |
| inputs=[job_title, job_description], | |
| outputs=[output_chart, top_3_output], | |
| show_progress=True | |
| ) | |
| # Example inputs | |
| gr.Examples( | |
| examples=[ | |
| ["Data Scientist", "Analyze large datasets and build machine learning models"], | |
| ["Graphic Designer", "Create visual content and design marketing materials"], | |
| ["Software Engineer", "Develop and maintain software applications"] | |
| ], | |
| inputs=[job_title, job_description], | |
| outputs=[output_chart, top_3_output], | |
| fn=predict_riasec, | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(share=True) |