Spaces:

huntrezz
/

LACityEmployeePayPredictor

Sleeping

App Files Files Community

LACityEmployeePayPredictor / app.py

huntrezz

Update app.py

5bbeebd verified 6 months ago

raw

history blame

2.6 kB

	import pandas as pd
	import numpy as np
	from sklearn.ensemble import VotingRegressor
	from sklearn.base import BaseEstimator, RegressorMixin
	import gradio as gr
	import joblib

	class FastAIWrapper(BaseEstimator, RegressorMixin):
	def __init__(self, learn):
	self.learn = learn

	def fit(self, X, y):
	return self

	def predict(self, X):
	dl = self.learn.dls.test_dl(X)
	preds, _ = self.learn.get_preds(dl=dl)
	return preds.numpy().flatten()

	# Load your data and trained model
	df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
	ensemble = joblib.load('ensemble_model.joblib')

	def predict_total_pay(gender, job_title, ethnicity):
	# Create a sample input DataFrame
	sample = pd.DataFrame({
	'GENDER': [gender],
	'JOB_TITLE': [job_title],
	'ETHNICITY': [ethnicity],
	})

	# Fill in other required features (you may need to adjust this based on your model's requirements)
	sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0]
	sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0]
	sample['MOU'] = df['MOU'].mode().iloc[0]
	sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0]
	sample['PAY_YEAR'] = df['PAY_YEAR'].max()
	sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean()
	sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean()
	sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean()

	# Calculate derived features
	sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
	sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']

	# Make prediction
	prediction = ensemble.predict(sample)[0]
	return prediction

	def gradio_predict(gender, ethnicity, job_title):
	predicted_pay = predict_total_pay(gender, job_title, ethnicity)
	return f"${predicted_pay:.2f}"

	# Prepare dropdown options
	genders = df['GENDER'].dropna().unique().tolist()
	ethnicities = df['ETHNICITY'].dropna().unique().tolist()
	job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())

	# Create Gradio interface
	iface = gr.Interface(
	fn=gradio_predict,
	inputs=[
	gr.Dropdown(choices=genders, label="Gender"),
	gr.Dropdown(choices=ethnicities, label="Ethnicity"),
	gr.Dropdown(choices=job_titles, label="Job Title")
	],
	outputs=gr.Textbox(label="Predicted Total Pay"),
	title="LA City Employee Pay Predictor",
	description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
	)

	iface.launch()