import gradio as gr
import pandas as pd
import joblib
import pickle

model = joblib.load("models/rfc_manual_selection.joblib")

with open("models/categorical_features_manual_selection.pkl", "rb") as f:
    categorical_features = pickle.load(f)


def feat_eng(df):
    df = df.copy()
    # Create a column of percentage of exams approved
    df = df.assign(
        # Number of approved units over total number of evaluations (its not a percentage of approved exams)
        first_sem_approved_over_evaluations=lambda x: x.first_sem_approved / x.first_sem_evaluations,
        second_sem_approved_over_evaluations=lambda x: x.second_sem_approved / x.second_sem_evaluations,
        # There are some students with 0 evaluations in the semesters, the coefficient will be np.nan value.
        # I will fill these with the mean value and create a boolean flag
        first_sem_has_evals=lambda x: x.first_sem_evaluations != 0,
        second_sem_has_evals=lambda x: x.second_sem_evaluations != 0,
        # Boolean flag for sem_without_evaluations predictors
        first_sem_has_units_without_evals=lambda x: x.first_sem_without_evaluations == 0,
        second_sem_has_units_without_evals=lambda x: x.second_sem_without_evaluations == 0,
        # Ratio of average grade over number of enrolled units
        first_sem_grade_over_enrolled=lambda x: x.first_sem_grade / x.first_sem_enrolled,
        second_sem_grade_over_enrolled=lambda x: x.second_sem_grade / x.second_sem_enrolled,
        # Again, there are some students with 0 enrolled units in the semesters, the coefficient will be np.nan value.
        # I will fill these with the mean value and create a boolean flag
        first_sem_has_enrolled=lambda x: x.first_sem_enrolled != 0,
        second_sem_has_enrolled=lambda x: x.second_sem_enrolled != 0,
        # Number of disapproved units
        first_sem_disapproved=lambda x: x.first_sem_enrolled - x.first_sem_approved,
        second_sem_disapproved=lambda x: x.second_sem_enrolled - x.second_sem_approved,
        # Percentage of units credited over enrolled
        first_sem_perc_credited=lambda x: x.first_sem_credited / x.first_sem_enrolled,
        second_sem_perc_credited=lambda x: x.second_sem_credited / x.second_sem_enrolled,
    )
    return df


def predict(*args):
    names = [
        "course",
        "day_evening",
        "previous_education",
        "displaced",
        "tuition_fees_up_to_date",
        "gender",
        "scholarship_holder",
        "age_at_enrollment",
        "first_sem_credited",
        "first_sem_enrolled",
        "first_sem_evaluations",
        "first_sem_approved",
        "first_sem_grade",
        "first_sem_without_evaluations",
        "second_sem_credited",
        "second_sem_enrolled",
        "second_sem_evaluations",
        "second_sem_approved",
        "second_sem_grade",
        "second_sem_without_evaluations",
    ]
    df = pd.DataFrame([args], columns=names)
    df = df.pipe(feat_eng)
    prediction = model.predict_proba(df)
    return {"Not Dropout": prediction[0][0], "Dropout": prediction[0][1]}


with gr.Blocks() as students_app:
    gr.Markdown(
        """
    # Analysis of students dropout and academic success
    
    Predicting probability of students dropout
    """
    )
    with gr.Row():
        with gr.Column():
            course = gr.Dropdown(label="Course", choices=categorical_features["course"], value = categorical_features["course"][0])
            day_evening = gr.Dropdown(label="Day/Evening", choices=categorical_features["day_evening"], value = categorical_features["day_evening"][0])
            previous_education = gr.Dropdown(
                label="Previous education", choices=categorical_features["previous_education"], value = categorical_features["previous_education"][0]
            )
            displaced = gr.Dropdown(label="Displaced", choices=categorical_features["displaced"], value = categorical_features["displaced"][0])
            tuition_fees_up_to_date = gr.Dropdown(
                label="Tuition fees up to date?", choices=categorical_features["tuition_fees_up_to_date"], value = categorical_features["tuition_fees_up_to_date"][0]
            )
            gender = gr.Dropdown(label="Gender", choices=categorical_features["gender"], value = categorical_features["gender"][0])
            scholarship_holder = gr.Dropdown(
                label="Scholarship holder?", choices=categorical_features["scholarship_holder"], value = categorical_features["scholarship_holder"][0]
            )
            age_at_enrollment = gr.Number(label="Age at time of enrollment", value = 20)
        with gr.Column():
            first_sem_credited = gr.Number(label="Curricular units 1st sem (credited)", value = 5)
            first_sem_enrolled = gr.Number(label="Curricular units 1st sem (enrolled)", value = 4)
            first_sem_evaluations = gr.Number(label="Curricular units 1st sem (evaluations)", value = 10)
            first_sem_approved = gr.Number(label="Curricular units 1st sem (approved)",value = 2)
            first_sem_grade = gr.Number(label="Curricular units 1st sem (grade)", value = 4)
            first_sem_without_evaluations = gr.Number(label="Curricular units 1st sem (without evaluations)", value = 1)
        with gr.Column():
            second_sem_credited = gr.Number(label="Curricular units 2nd sem (credited)", value = 4)
            second_sem_enrolled = gr.Number(label="Curricular units 2nd sem (enrolled)", value = 4)
            second_sem_evaluations = gr.Number(label="Curricular units 2nd sem (evaluations)", value = 8)
            second_sem_approved = gr.Number(label="Curricular units 2nd sem (approved)", value = 3)
            second_sem_grade = gr.Number(label="Curricular units 2nd sem (grade)", value = 3)
            second_sem_without_evaluations = gr.Number(label="Curricular units 2nd sem (without evaluations)", value = 2)
    with gr.Row():
        with gr.Column():
            predict_btn = gr.Button(value="Predict")
        with gr.Column():
            label = gr.Label(label="Outcome probabilities", show_label=False)
            predict_btn.click(
                predict,
                inputs=[
                    course,
                    day_evening,
                    previous_education,
                    displaced,
                    tuition_fees_up_to_date,
                    gender,
                    scholarship_holder,
                    age_at_enrollment,
                    first_sem_credited,
                    first_sem_enrolled,
                    first_sem_evaluations,
                    first_sem_approved,
                    first_sem_grade,
                    first_sem_without_evaluations,
                    second_sem_credited,
                    second_sem_enrolled,
                    second_sem_evaluations,
                    second_sem_approved,
                    second_sem_grade,
                    second_sem_without_evaluations,
                ],
                outputs=[label],
            )

students_app.launch()