Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import VotingRegressor | |
from sklearn.base import BaseEstimator, RegressorMixin | |
import gradio as gr | |
import joblib | |
class FastAIWrapper(BaseEstimator, RegressorMixin): | |
def __init__(self, learn): | |
self.learn = learn | |
def fit(self, X, y): | |
return self | |
def predict(self, X): | |
dl = self.learn.dls.test_dl(X) | |
preds, _ = self.learn.get_preds(dl=dl) | |
return preds.numpy().flatten() | |
# Load your data and trained model | |
df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False) | |
ensemble = joblib.load('ensemble_model.joblib') | |
def predict_total_pay(gender, job_title, ethnicity): | |
# Create a sample input DataFrame | |
sample = pd.DataFrame({ | |
'GENDER': [gender], | |
'JOB_TITLE': [job_title], | |
'ETHNICITY': [ethnicity], | |
}) | |
# Fill in other required features (you may need to adjust this based on your model's requirements) | |
sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0] | |
sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0] | |
sample['MOU'] = df['MOU'].mode().iloc[0] | |
sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0] | |
sample['PAY_YEAR'] = df['PAY_YEAR'].max() | |
sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean() | |
sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean() | |
sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean() | |
# Calculate derived features | |
sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1) | |
sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] | |
# Make prediction | |
prediction = ensemble.predict(sample)[0] | |
return prediction | |
def gradio_predict(gender, ethnicity, job_title): | |
predicted_pay = predict_total_pay(gender, job_title, ethnicity) | |
return f"${predicted_pay:.2f}" | |
# Prepare dropdown options | |
genders = df['GENDER'].dropna().unique().tolist() | |
ethnicities = df['ETHNICITY'].dropna().unique().tolist() | |
job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist()) | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=gradio_predict, | |
inputs=[ | |
gr.Dropdown(choices=genders, label="Gender"), | |
gr.Dropdown(choices=ethnicities, label="Ethnicity"), | |
gr.Dropdown(choices=job_titles, label="Job Title") | |
], | |
outputs=gr.Textbox(label="Predicted Total Pay"), | |
title="LA City Employee Pay Predictor", | |
description="Predict the total pay for LA City employees based on gender, ethnicity, and job title." | |
) | |
iface.launch() |