File size: 9,833 Bytes
b7c5cb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import streamlit as st 
import plotly.express as px
import pandas as pd
from module.load_data import data   
from module.dataset import create_data
import plotly.express as px
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import matplotlib.pyplot as  plt

#plt.style.use('seaborn-dark-palette')

def grades(mark):
	if mark >= 0 and mark < 5:
		return 'U'
	elif  mark >= 5 and mark < 9:
		return 'E'
	elif mark >= 9 and mark < 12:
		return 'D'
	elif mark >= 12 and mark < 15:
		return 'C'
	elif mark >= 15 and mark < 18:
		return 'B'
	else:
		return 'A'

def segmentation(data:pd.DataFrame):
	df = data.copy()
	factor = pd.factorize(df.form)
	df['form'] = factor[0]; definition=factor[1]

	cols = [f'seq {i+1}' for i in range(6)] + ['age','form']
	tree = DecisionTreeClassifier(random_state=31012020)
	tree.fit(df[cols], df['gender'])
	
	export_graphviz(tree, out_file="images/tree.dot", class_names=["Female", "Male"],
		feature_names=cols, impurity=False, filled=True,rounded=True)
	result = pd.DataFrame({"Feature":cols, "Gain":tree.feature_importances_})
	return result

def scatterplot(female=None, male=None, line_separator=None, x_axis=None, y_axis=None):
	fig = plt.figure(figsize=(5,5))
	plt.scatter(female[x_axis], female[y_axis], marker='v', color='red', label="Girl", lw=1.2)
	plt.scatter(male[x_axis], male[y_axis], marker='o', color='blue', label="Boy",lw=1.2)
	plt.xlabel(x_axis); plt.ylabel(y_axis)
	plt.title("Decision line", fontweight="bold")
	for u in  line_separator[0]:
		plt.vlines(line_separator[0], 0, female[y_axis].max(), color="black",
		 linestyles="-.", lw=1.75)

	for v in  line_separator[1] :
		plt.hlines(line_separator[1], -1, female[y_axis].max(), color="black",
		 linestyles="dashed", lw=1.75)

	plt.legend(loc="best", frameon=True, fancybox=True, shadow=True, title="Gender")
	return fig


df_trim = create_data(data)
total_students = len(data)
average_age = data.age.mean()
gca1 = df_trim["trimester 1"].mean()
gca2 = df_trim["trimester 2"].mean()
gca3 = df_trim["trimester 3"].mean()
annual_gca = (gca1+gca2+gca3)/3

evaluations = ['seq 1', 'seq 2','seq 3','seq 4','seq 5','seq 6']

sgca = pd.DataFrame(columns=['Male','Female'], index=evaluations)

sgca['Male'] = data[data.gender == 'M'][evaluations].mean()
sgca['Female'] = data[data.gender == 'F'][evaluations].mean()

form_counts = data.form.value_counts()
gender_counts = data.gender.value_counts()

progression = sgca.diff()

pass_or_fail =pd.DataFrame({seq:data[seq].apply(lambda x: 'Passed' if x >= 10.0 else 'Failed').value_counts().to_dict() for seq in evaluations})   

student_grade =  pd.DataFrame({seq:data[seq].apply(grades).value_counts().to_dict() for seq in evaluations})

##print(student_grade)

bar_polar = px.bar_polar(data_frame=sgca, r='Male', 
	theta=sgca.index, color='Female', 
	title='General Class Average (GCA) for each evaluation',
	barmode="overlay",width=1200, height=500)
pie_form = px.pie(data_frame=form_counts, names=form_counts.index, values=form_counts,
 title="Form", hole=0.25, width=500, height=500)
pie_gender = px.pie(data_frame=gender_counts, names=gender_counts.index, values=gender_counts,
 title="Gender", hole=0.25, width=500, height=500)


feature_importances = segmentation(data)

bar_feature_importance = px.bar(data_frame=feature_importances, x="Feature", y="Gain",
  title="Feature importances bar", width=500)

with open("images/tree.dot") as f:
	dot_graph = f.read()

female = data[data.gender == "F"]
male   = data[data.gender == "M"]


def visualization():
	placeholder1 = st.empty()

	with placeholder1.container():
		kp1, kp2, kp3, kp4, kp5, kp6 = st.columns(6)

		kp1.metric(
			label= "**:red[πŸ‘¬ Total students]**",
			value = total_students,
			delta = ""
			)

		kp2.metric(
			label= ":red[**⏳ Average age**]",
			value = f"{average_age:.2f}" ,
			delta = ""
			)

		kp3.metric(
			label=":red[**✍ GCA-trimester 1**]",
			value=f"{gca1:.2f}/20",
			delta = 0.0
			)
		kp4.metric(
			label=":red[**✍ GCA-trimester 2**]",
			value = f"{gca2:.2f}/20",
			delta = round(gca2-gca1, 3) 
			)
		kp5.metric(
			label =":red[**✍ GCA-trimester 3**]",
			value = f"{gca3:.2f}/20",
			delta = round(gca3-gca2, 3) 
			)
		kp6.metric(
			label =":red[**πŸŽ“ Annual-GCA**]",
			value = f"{annual_gca:.2f}/20",
			delta = "" 
			)


	placeholder2 = st.empty()
	with placeholder2.container():
		col1, col2 = st.columns(2)
		col1.plotly_chart(pie_gender)
		col1.caption('**We represent the gender of students in form 6e-5e (francophone section one of the cameroonian education system).**')
		col2.plotly_chart(pie_form)
		col2.caption("""**We represent the number of students in each form 6e and 5e**. 
		**NB**: **Cameroon have two sub-education systems one is a francophone and a second is an anglosaxone.**   
			""")
	
	tab1, tab2, tab3 = st.tabs([':orange[**Performance**]', ':orange[**Distribution**]', ':orange[**Miscelaneous**]'])

	with tab1:
		tab1.plotly_chart(bar_polar)
		tab1.caption("""We represent the general class average of students in each sequence. Colors bar shows the marks 
			for female gender and bar polar also shows the marks for male gender.""")
		tab1.subheader('Progression')
		tab1.line_chart(progression)
		tab1.caption("""This chart shows a progression of students during the six evaluations. 
			We just make a difference between the previous sequence and the next sequence. 
			The x-axis represent a sequence and the y-axis represent the growth of students.""")

		tab1.subheader('Passed or Failed')
		col1, col2 = tab1.columns(2)
		col1.caption(':red[πŸ’ƒ passed or failed table πŸ‘‰]')
		col1.dataframe(pass_or_fail)
		col1.caption("""
		- In left: We got this table to compute the number of students that the mark is  >10 (passed) or <10 (failed) for 
		each sequence.
		- In right: We plot this table. The chart explains how a student make some effort to succeed a ICT course.   
		- In general, we can appreciate the effort of the students in the form 6e and 5e for each evaluation.   
			""")
		col2.bar_chart(pass_or_fail.T)

		tab1.subheader("Student grades")
		col3, col4 = tab1.columns(2)
		col3.caption(':red[πŸ’ƒ grades table πŸ‘‰]')
		col3.dataframe(student_grade.fillna(0))
		col3.caption("""   
			The student grade respect this decision:
			- U -> [0 - 5[;   E -> [5 - 9[
			- D -> [9 - 12[;  C -> [12 - 15[
			- B -> [15 - 18[; A -> [18 - 20[
			""")
		col4.bar_chart(student_grade.T)
		col4.caption('This bar chart shows the number of student in each grade for each sequence.')

	with tab2:
		tab2.subheader('evaluation and age distribution')
		tab2.caption('Histogram')
		var1 = tab2.selectbox('Choose items', evaluations+['age'], key=6)
		fig1 = px.histogram(data_frame=data, x=var1,  width=1200, height=500, opacity=0.75)
		tab2.plotly_chart(fig1)
		col1, col2 = tab2.columns(2)
		col1.caption('Boxplot'); col2.caption('Violin')
		var2 = col1.selectbox('Choose items', evaluations+['age'], key=7)
		var3 = col2.selectbox('Choose items', evaluations+['age'], key=8)
		col1.plotly_chart(px.box(data_frame=data, y=var2,  width=500, height=500))
		col2.plotly_chart(px.violin(data_frame=data, x=var3,  width=500, height=500))


	with tab3:
		st.subheader('Relation graph')
		container = tab3.empty()
		vcol1, vcol2, vcol3 = container.columns(3)

		trim1 = px.scatter(data_frame=df_trim, x="trimester 1", y="trimester 2", width=350, 
			height=350,title="trimester 1 & 2")
		vcol1.plotly_chart(trim1)

		trim2 = px.scatter(data_frame=df_trim, x="trimester 2", y="trimester 3", width=350, height=350,title="trimester 3 & 2")
		vcol2.plotly_chart(trim2)

		trim3 = px.scatter(data_frame=df_trim, x="trimester 3", y="trimester 1", width=350, height=350,title="trimester 1 & 3")
		vcol3.plotly_chart(trim3)

		tab3.caption("""
			The three charts shows the monotony of the relation function between the three trimesters. 
			Each chart prove that the students for form 6e-5e francophone education systems at the Tebap college make considerably
			an effort to succeed an ICT's course.
			""")
		tab3.subheader('Supervised segmentation with tree')
		tab3.caption(""" 
			In this section, we are making a supervised segmentation to segment the population of student into subgroups
			that have different values for the target gender. To find a subgroups, we are using a tree structured model.
			We cannot make a classification here because our dataset is just a size equal to 46. Let's go! πŸ’‚
			""")
		block = tab3.empty()
		hcol1, hcol2 = block.columns(2)
		hcol1.caption('**Feature importance table**πŸ‘‰')
		hcol1.dataframe(feature_importances)
		hcol1.caption("""  
			The table shows the features that the tree structured model 
			consider very importance for segmenting students
			population in to subgroups. 
			""")

		hcol2.plotly_chart(bar_feature_importance)
		tab3.subheader('Dot graph')
		tab3.graphviz_chart(dot_graph)
		tab3.caption("""
			Let's us interprete this graph.

			- node 0: the student that the mark in seq 6 <= 11.375 are female gender; the answer is False. In each node, we have 
			a condition where the next node give an answers.

			Let's plot the line separating the region.  
			""")

		bcol1, bcol2 = tab3.columns(2)

		
		var1 = bcol1.selectbox("Choose x-axis (line[i])", ['seq 1', "seq 2", "seq 4", "seq 6"])
		var2 = bcol1.selectbox("Choose y-axis (line[i+1])", reversed(['seq 1', "seq 2", "seq 4", "seq 6"]))
		
		line1 = bcol2.multiselect("Line separator for x-axis", [1.25, 2.75, 3.125, 4.375,5.0, 5.625, 11.375,13.5]) 
		line2 = bcol2.multiselect("Line separator for y-axis", [1.25, 2.75, 3.125, 4.375,5.0, 5.625, 11.375,13.5])
		tab3.caption("""  
			We have eight lines line 1 (node 0) to line 8 (node 12). We start to line 1 and line 2.  
			""")
		gplot = scatterplot(female, male, line_separator=[line1, line2], x_axis=var1, y_axis=var2)
		tab3.pyplot(gplot)