Spaces:

Gieorgie
/

Employee_Attrition_Prediction

Sleeping

App Files Files Community

Gieorgie commited on Jun 5

Commit

0c7270f

•

1 Parent(s): dad36ba

Upload 7 files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
Main_Data.csv +0 -0
app.py +11 -0
eda.py +218 -0
employee.jpg +3 -0
model.pkl +3 -0
prediction.py +71 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+employee.jpg filter=lfs diff=lfs merge=lfs -text

Main_Data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import eda
+import prediction
+import streamlit as st
+page = st.sidebar.selectbox('Pilih Halaman: ', ('EDA', 'Prediction'))
+if page == 'EDA':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import plotly.express as px
+from PIL import Image
+#melebarkan
+st.set_page_config(
+    page_title='Employee Attrition Prediction',
+    layout='wide',
+    initial_sidebar_state='expanded'
+)
+st.markdown("""<style>.reportview-container {background: "5160549.jpg"}.sidebar .sidebar-content {background: "5160549.jpg"}</style>""",unsafe_allow_html=True)
+def run():
+    # membuat judul
+    st.title('Employee Attrition Prediction')
+    #membuat sub header
+    st.subheader('Employee Attrition Prediction EDA')
+    # library pillow buat gambar
+    image = Image.open('employee.jpg')
+    st.markdown('---')
+    st.image(image, caption=' "" ')
+    # descripsi
+    st.write('### Halaman ini berisi Eksplorasi Data ')
+    # Membuat Garis lurus
+    st.markdown('---')
+    # Nampilin dataframe
+    st.write('### Employee Attrition Data')
+    df = pd.read_csv('Main_data.csv')
+    st.dataframe(df.head(5))
+    st.markdown('***')
+    #barplot
+    fig = plt.figure(figsize=(8,5))
+    ###########################################
+    st.write('### Attrition Distribution')
+    # Menghitung jumlah setiap value
+    target_counts = df['Attrition'].value_counts()
+    # Membuat label untuk legenda dengan jumlah setiap value
+    labels = [f'Attrition {i} - {count}' for i, count in target_counts.items()]
+    # Membuat pie chart
+    fig = plt.figure(figsize=(10, 5))
+    plt.subplot(1, 2, 2)
+    target_counts.plot(kind='pie', autopct='%1.1f%%',  shadow=True, labels=None, colors =['#0072C6', '#BFBFBF'])
+    plt.title('Employee Attrition')
+    # Menambahkan legenda
+    plt.legend(labels, loc='upper right', bbox_to_anchor=(1.3, 1))
+    st.pyplot(fig)
+    st.markdown('---')
+    ###########################################
+    st.write('### Data Demografi Karyawan')
+    pilihan = st.selectbox('Pilih Kolom : ', ('Gender','Education','MaritalStatus','Department'))
+    # Melakukan pengelompokan langsung pada indeks DataFrame
+    attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
+    fig = plt.figure(figsize=(15, 5))
+    colors =['#0072C6', '#BFBFBF']
+    # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
+    ax = plt.gca()
+    # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
+    if len(attrition_data.index) > 3:
+        attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
+        ax.set_xlabel('Jumlah Karyawan')
+        ax.set_ylabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+    else:
+        attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
+        ax.set_ylabel('Jumlah Karyawan')
+        ax.set_xlabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+        ax.set_xticklabels(attrition_data.index, rotation=0)
+    ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}')  # Menggunakan nama kolom yang dipilih langsung
+    ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
+    # Menambahkan anotasi pada plot
+    for container in ax.containers:
+        if len(attrition_data.index) > 3:
+            labels = [f'{int(v.get_width())}' for v in container]
+        else:
+            labels = [f'{int(v.get_height())}' for v in container]
+        ax.bar_label(container, labels=labels, label_type='center', padding=2)
+    st.pyplot(fig)
+    st.markdown('---')
+    ####################################################
+    st.write('### Data Survey Karyawan')
+    pilihan = st.selectbox('Pilih Kolom : ', ('EnvironmentSatisfaction','JobSatisfaction', 'WorkLifeBalance'))
+    # Melakukan pengelompokan langsung pada indeks DataFrame
+    attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
+    fig = plt.figure(figsize=(15, 5))
+    colors =['#0072C6', '#BFBFBF']
+    # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
+    ax = plt.gca()
+    # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
+    if len(attrition_data.index) > 3:
+        attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
+        ax.set_xlabel('Jumlah Karyawan')
+        ax.set_ylabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+    else:
+        attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
+        ax.set_ylabel('Jumlah Karyawan')
+        ax.set_xlabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+        ax.set_xticklabels(attrition_data.index, rotation=0)
+    ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}')  # Menggunakan nama kolom yang dipilih langsung
+    ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
+    # Menambahkan anotasi pada plot
+    for container in ax.containers:
+        if len(attrition_data.index) > 3:
+            labels = [f'{int(v.get_width())}' for v in container]
+        else:
+            labels = [f'{int(v.get_height())}' for v in container]
+        ax.bar_label(container, labels=labels, label_type='center', padding=2)
+    st.pyplot(fig)
+    st.markdown('---')
+    ####################################################
+    st.write('### Data Performa Karyawan')
+    pilihan = st.selectbox('Pilih Kolom : ', ('JobInvolvement', 'PerformanceRating','BusinessTravel','JobLevel', 'JobRole'))
+    # Melakukan pengelompokan langsung pada indeks DataFrame
+    attrition_data = df.groupby([df[pilihan], 'Attrition']).size().unstack(fill_value=0)
+    fig = plt.figure(figsize=(15, 5))
+    colors =['#0072C6', '#BFBFBF']
+    # Plot: Distribusi Attrition berdasarkan kolom yang dipilih
+    ax = plt.gca()
+    # Menyesuaikan jenis plot berdasarkan jumlah indeks attrition_data
+    if len(attrition_data.index) > 3:
+        attrition_data.plot(kind='barh', stacked=True, color=colors, ax=ax)
+        ax.set_xlabel('Jumlah Karyawan')
+        ax.set_ylabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+    else:
+        attrition_data.plot(kind='bar', stacked=True, color=colors, ax=ax)
+        ax.set_ylabel('Jumlah Karyawan')
+        ax.set_xlabel(pilihan)  # Menggunakan nama kolom yang dipilih langsung
+        ax.set_xticklabels(attrition_data.index, rotation=0)
+    ax.set_title(f'Distribusi Attrition Berdasarkan {pilihan}')  # Menggunakan nama kolom yang dipilih langsung
+    ax.legend(title='Attrition', labels=['Tidak', 'Ya'])
+    # Menambahkan anotasi pada plot
+    for container in ax.containers:
+        if len(attrition_data.index) > 3:
+            labels = [f'{int(v.get_width())}' for v in container]
+        else:
+            labels = [f'{int(v.get_height())}' for v in container]
+        ax.bar_label(container, labels=labels, label_type='center', padding=2)
+    st.pyplot(fig)
+    st.markdown('---')
+    ####################################################
+    st.write('### Data Numerical')
+    pilihan = st.selectbox('Pilih Kolom : ', ('Age','DistanceFromHome','MonthlyIncome', 'NumCompaniesWorked','PercentSalaryHike','TotalWorkingYears',
+               'YearsAtCompany','YearsSinceLastPromotion','YearsWithCurrManager'))
+    fig = plt.figure(figsize=(15, 5))
+    attrition_no = df[df['Attrition'] == 'No'][pilihan]
+    attrition_yes = df[df['Attrition'] == 'Yes'][pilihan]
+    sns.histplot(attrition_no, color=colors[0], label='No', kde=False, bins=30)
+    sns.histplot(attrition_yes, color=colors[1], label='Yes', kde=False, bins=30)
+    plt.title(f'Histogram Distribusi {pilihan} Berdasarkan Attrition')
+    plt.xlabel(pilihan)
+    plt.ylabel('Jumlah Karyawan')
+    plt.legend(title='Attrition')
+    plt.tight_layout()
+    st.pyplot(fig)
+if __name__ == '__main__':
+    run()

employee.jpg ADDED Viewed

Git LFS Details

SHA256: a1dcc459f8072f5645ac67a2a98d0fc1cd98a1de116ad6aecc9c73c4c620c8eb
Pointer size: 133 Bytes
Size of remote file: 12.7 MB

model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:323c9b55b492141bda434df631e0cc483a0726af23265c9ebd0829a6393cb651
+size 263218

prediction.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+import pandas as pd
+import pickle
+# Path to the model file
+model_path = "model.pkl"
+# Load the model
+with open(model_path, 'rb') as f:
+    model = pickle.load(f)
+def run():
+    st.title('Prediksi Pengunduran Diri Karyawan')
+    # Formulir untuk pengisian data
+    with st.form('form_employee_attrition'):
+        # Kolom input sesuai dengan keterangan yang Anda berikan
+        business_travel = st.selectbox('Business Travel', ['Travel_Rarely', 'Travel_Frequently', 'Non-Travel'])
+        department = st.selectbox('Department', ['Sales', 'Research & Development', 'Human Resources'])
+        education_field = st.selectbox('Education Field', ['Life Sciences', 'Other', 'Medical', 'Marketing', 'Technical Degree', 'Human Resources'])
+        job_role = st.selectbox('Job Role', ['Healthcare Representative', 'Research Scientist', 'Sales Executive', 'Human Resources', 'Research Director', 'Laboratory Technician', 'Manufacturing Director', 'Sales Representative', 'Manager'])
+        marital_status = st.selectbox('Marital Status', ['Married', 'Single', 'Divorced'])
+        training_times_last_year = st.selectbox('Training Times Last Year', [0, 1, 2, 3, 4, 5, 6])
+        job_involvement = st.selectbox('Job Involvement', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
+        environment_satisfaction = st.selectbox('Environment Satisfaction', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
+        job_satisfaction = st.selectbox('Job Satisfaction', [1, 2, 3, 4], format_func=lambda x: {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very High'}[x])
+        work_life_balance = st.selectbox('Work Life Balance', [1, 2, 3, 4], format_func=lambda x: {1: 'Bad', 2: 'Good', 3: 'Better', 4: 'Best'}[x])
+        age = st.slider('Age', min_value=18, max_value=60)
+        percent_salary_hike = st.slider('Percent Salary Hike', min_value=11, max_value=25)
+        total_working_years = st.slider('Total Working Years', min_value=0, max_value=40)
+        years_at_company = st.slider('Years At Company', min_value=0, max_value=40)
+        years_since_last_promotion = st.slider('Years Since Last Promotion', min_value=0, max_value=15)
+        years_with_curr_manager = st.slider('Years With Current Manager', min_value=0, max_value=17)
+        # Tombol untuk melakukan prediksi
+        submitted = st.form_submit_button('Prediksi')
+    # Menyusun data input menjadi DataFrame
+    data = {
+        'BusinessTravel': business_travel,
+        'Department': department,
+        'EducationField': education_field,
+        'JobRole': job_role,
+        'MaritalStatus': marital_status,
+        'TrainingTimesLastYear': training_times_last_year,
+        'JobInvolvement': job_involvement,
+        'EnvironmentSatisfaction': environment_satisfaction,
+        'JobSatisfaction': job_satisfaction,
+        'WorkLifeBalance': work_life_balance,
+        'Age': age,
+        'PercentSalaryHike': percent_salary_hike,
+        'TotalWorkingYears': total_working_years,
+        'YearsAtCompany': years_at_company,
+        'YearsSinceLastPromotion': years_since_last_promotion,
+        'YearsWithCurrManager': years_with_curr_manager
+    }
+    features = pd.DataFrame(data, index=[0])
+    # Menampilkan fitur input pengguna
+    st.write("## Fitur Input Pengguna")
+    st.write(features)
+    # Melakukan prediksi jika tombol prediksi ditekan
+    if submitted:
+        prediction = model.predict(features)
+        st.subheader('Hasil Prediksi')
+        st.write('Pengunduran Diri Karyawan:', 'Ya' if prediction[0] == 1 else 'Tidak')
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pandas
+seaborn
+matplotlib
+numpy
+scikit-learn==1.2.2
+Pillow
+plotly