massock-sipe / module /
Massock's picture
Upload 20 files
import streamlit as st
import as px
import pandas as pd
from module.load_data import data
from module.dataset import create_data
import as px
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import matplotlib.pyplot as plt'seaborn-dark-palette')
def grades(mark):
if mark >= 0 and mark < 5:
return 'U'
elif mark >= 5 and mark < 9:
return 'E'
elif mark >= 9 and mark < 12:
return 'D'
elif mark >= 12 and mark < 15:
return 'C'
elif mark >= 15 and mark < 18:
return 'B'
return 'A'
def segmentation(data:pd.DataFrame):
df = data.copy()
factor = pd.factorize(df.form)
df['form'] = factor[0]; definition=factor[1]
cols = [f'seq {i+1}' for i in range(6)] + ['age','form']
tree = DecisionTreeClassifier(random_state=31012020)[cols], df['gender'])
export_graphviz(tree, out_file="images/", class_names=["Female", "Male"],
feature_names=cols, impurity=False, filled=True,rounded=True)
result = pd.DataFrame({"Feature":cols, "Gain":tree.feature_importances_})
return result
def scatterplot(female=None, male=None, line_separator=None, x_axis=None, y_axis=None):
fig = plt.figure(figsize=(5,5))
plt.scatter(female[x_axis], female[y_axis], marker='v', color='red', label="Girl", lw=1.2)
plt.scatter(male[x_axis], male[y_axis], marker='o', color='blue', label="Boy",lw=1.2)
plt.xlabel(x_axis); plt.ylabel(y_axis)
plt.title("Decision line", fontweight="bold")
for u in line_separator[0]:
plt.vlines(line_separator[0], 0, female[y_axis].max(), color="black",
linestyles="-.", lw=1.75)
for v in line_separator[1] :
plt.hlines(line_separator[1], -1, female[y_axis].max(), color="black",
linestyles="dashed", lw=1.75)
plt.legend(loc="best", frameon=True, fancybox=True, shadow=True, title="Gender")
return fig
df_trim = create_data(data)
total_students = len(data)
average_age = data.age.mean()
gca1 = df_trim["trimester 1"].mean()
gca2 = df_trim["trimester 2"].mean()
gca3 = df_trim["trimester 3"].mean()
annual_gca = (gca1+gca2+gca3)/3
evaluations = ['seq 1', 'seq 2','seq 3','seq 4','seq 5','seq 6']
sgca = pd.DataFrame(columns=['Male','Female'], index=evaluations)
sgca['Male'] = data[data.gender == 'M'][evaluations].mean()
sgca['Female'] = data[data.gender == 'F'][evaluations].mean()
form_counts = data.form.value_counts()
gender_counts = data.gender.value_counts()
progression = sgca.diff()
pass_or_fail =pd.DataFrame({seq:data[seq].apply(lambda x: 'Passed' if x >= 10.0 else 'Failed').value_counts().to_dict() for seq in evaluations})
student_grade = pd.DataFrame({seq:data[seq].apply(grades).value_counts().to_dict() for seq in evaluations})
bar_polar = px.bar_polar(data_frame=sgca, r='Male',
theta=sgca.index, color='Female',
title='General Class Average (GCA) for each evaluation',
barmode="overlay",width=1200, height=500)
pie_form = px.pie(data_frame=form_counts, names=form_counts.index, values=form_counts,
title="Form", hole=0.25, width=500, height=500)
pie_gender = px.pie(data_frame=gender_counts, names=gender_counts.index, values=gender_counts,
title="Gender", hole=0.25, width=500, height=500)
feature_importances = segmentation(data)
bar_feature_importance =, x="Feature", y="Gain",
title="Feature importances bar", width=500)
with open("images/") as f:
dot_graph =
female = data[data.gender == "F"]
male = data[data.gender == "M"]
def visualization():
placeholder1 = st.empty()
with placeholder1.container():
kp1, kp2, kp3, kp4, kp5, kp6 = st.columns(6)
label= "**:red[πŸ‘¬ Total students]**",
value = total_students,
delta = ""
label= ":red[**⏳ Average age**]",
value = f"{average_age:.2f}" ,
delta = ""
label=":red[**✍ GCA-trimester 1**]",
delta = 0.0
label=":red[**✍ GCA-trimester 2**]",
value = f"{gca2:.2f}/20",
delta = round(gca2-gca1, 3)
label =":red[**✍ GCA-trimester 3**]",
value = f"{gca3:.2f}/20",
delta = round(gca3-gca2, 3)
label =":red[**πŸŽ“ Annual-GCA**]",
value = f"{annual_gca:.2f}/20",
delta = ""
placeholder2 = st.empty()
with placeholder2.container():
col1, col2 = st.columns(2)
col1.caption('**We represent the gender of students in form 6e-5e (francophone section one of the cameroonian education system).**')
col2.caption("""**We represent the number of students in each form 6e and 5e**.
**NB**: **Cameroon have two sub-education systems one is a francophone and a second is an anglosaxone.**
tab1, tab2, tab3 = st.tabs([':orange[**Performance**]', ':orange[**Distribution**]', ':orange[**Miscelaneous**]'])
with tab1:
tab1.caption("""We represent the general class average of students in each sequence. Colors bar shows the marks
for female gender and bar polar also shows the marks for male gender.""")
tab1.caption("""This chart shows a progression of students during the six evaluations.
We just make a difference between the previous sequence and the next sequence.
The x-axis represent a sequence and the y-axis represent the growth of students.""")
tab1.subheader('Passed or Failed')
col1, col2 = tab1.columns(2)
col1.caption(':red[πŸ’ƒ passed or failed table πŸ‘‰]')
- In left: We got this table to compute the number of students that the mark is >10 (passed) or <10 (failed) for
each sequence.
- In right: We plot this table. The chart explains how a student make some effort to succeed a ICT course.
- In general, we can appreciate the effort of the students in the form 6e and 5e for each evaluation.
tab1.subheader("Student grades")
col3, col4 = tab1.columns(2)
col3.caption(':red[πŸ’ƒ grades table πŸ‘‰]')
The student grade respect this decision:
- U -> [0 - 5[; E -> [5 - 9[
- D -> [9 - 12[; C -> [12 - 15[
- B -> [15 - 18[; A -> [18 - 20[
col4.caption('This bar chart shows the number of student in each grade for each sequence.')
with tab2:
tab2.subheader('evaluation and age distribution')
var1 = tab2.selectbox('Choose items', evaluations+['age'], key=6)
fig1 = px.histogram(data_frame=data, x=var1, width=1200, height=500, opacity=0.75)
col1, col2 = tab2.columns(2)
col1.caption('Boxplot'); col2.caption('Violin')
var2 = col1.selectbox('Choose items', evaluations+['age'], key=7)
var3 = col2.selectbox('Choose items', evaluations+['age'], key=8)
col1.plotly_chart(, y=var2, width=500, height=500))
col2.plotly_chart(px.violin(data_frame=data, x=var3, width=500, height=500))
with tab3:
st.subheader('Relation graph')
container = tab3.empty()
vcol1, vcol2, vcol3 = container.columns(3)
trim1 = px.scatter(data_frame=df_trim, x="trimester 1", y="trimester 2", width=350,
height=350,title="trimester 1 & 2")
trim2 = px.scatter(data_frame=df_trim, x="trimester 2", y="trimester 3", width=350, height=350,title="trimester 3 & 2")
trim3 = px.scatter(data_frame=df_trim, x="trimester 3", y="trimester 1", width=350, height=350,title="trimester 1 & 3")
The three charts shows the monotony of the relation function between the three trimesters.
Each chart prove that the students for form 6e-5e francophone education systems at the Tebap college make considerably
an effort to succeed an ICT's course.
tab3.subheader('Supervised segmentation with tree')
In this section, we are making a supervised segmentation to segment the population of student into subgroups
that have different values for the target gender. To find a subgroups, we are using a tree structured model.
We cannot make a classification here because our dataset is just a size equal to 46. Let's go! πŸ’‚
block = tab3.empty()
hcol1, hcol2 = block.columns(2)
hcol1.caption('**Feature importance table**πŸ‘‰')
The table shows the features that the tree structured model
consider very importance for segmenting students
population in to subgroups.
tab3.subheader('Dot graph')
Let's us interprete this graph.
- node 0: the student that the mark in seq 6 <= 11.375 are female gender; the answer is False. In each node, we have
a condition where the next node give an answers.
Let's plot the line separating the region.
bcol1, bcol2 = tab3.columns(2)
var1 = bcol1.selectbox("Choose x-axis (line[i])", ['seq 1', "seq 2", "seq 4", "seq 6"])
var2 = bcol1.selectbox("Choose y-axis (line[i+1])", reversed(['seq 1', "seq 2", "seq 4", "seq 6"]))
line1 = bcol2.multiselect("Line separator for x-axis", [1.25, 2.75, 3.125, 4.375,5.0, 5.625, 11.375,13.5])
line2 = bcol2.multiselect("Line separator for y-axis", [1.25, 2.75, 3.125, 4.375,5.0, 5.625, 11.375,13.5])
We have eight lines line 1 (node 0) to line 8 (node 12). We start to line 1 and line 2.
gplot = scatterplot(female, male, line_separator=[line1, line2], x_axis=var1, y_axis=var2)