Delete Analyze-and-predict-student-scores
Browse files- Analyze-and-predict-student-scores/All_maj.csv +0 -3
- Analyze-and-predict-student-scores/R.png +0 -0
- Analyze-and-predict-student-scores/README.md +0 -2
- Analyze-and-predict-student-scores/__pycache__/function.cpython-310.pyc +0 -0
- Analyze-and-predict-student-scores/cols_to_drop.txt +0 -13
- Analyze-and-predict-student-scores/column_all.txt +0 -78
- Analyze-and-predict-student-scores/config.toml +0 -2
- Analyze-and-predict-student-scores/courses_list.txt +0 -42
- Analyze-and-predict-student-scores/dataScore.csv +0 -0
- Analyze-and-predict-student-scores/function.py +0 -253
- Analyze-and-predict-student-scores/main.py +0 -310
- Analyze-and-predict-student-scores/model/R_Late.joblib +0 -3
- Analyze-and-predict-student-scores/model/R_Sem.joblib +0 -3
- Analyze-and-predict-student-scores/model/R_rank.joblib +0 -3
- Analyze-and-predict-student-scores/requirements.txt +0 -8
- Analyze-and-predict-student-scores/rows_to_drop.txt +0 -15
Analyze-and-predict-student-scores/All_maj.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d9b5865503b770f70fe93a3a1b39e2d53036b45a7fa5c5a17cf226cf9bf0545f
|
3 |
-
size 47671628
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/R.png
DELETED
Binary file (25.6 kB)
|
|
Analyze-and-predict-student-scores/README.md
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
# Analyze-and-predict-student-performance
|
2 |
-
Link to web-app: https://itdsiu19001-analyze-and-predict-student-performance-main-oiibq6.streamlit.app/
|
|
|
|
|
|
Analyze-and-predict-student-scores/__pycache__/function.cpython-310.pyc
DELETED
Binary file (7.73 kB)
|
|
Analyze-and-predict-student-scores/cols_to_drop.txt
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
Intensive English 0- Twinning Program
|
2 |
-
Intensive English 01- Twinning Program
|
3 |
-
Intensive English 02- Twinning Program
|
4 |
-
Intensive English 03- Twinning Program
|
5 |
-
Intensive English 1- Twinning Program
|
6 |
-
Intensive English 2- Twinning Program
|
7 |
-
Intensive English 3- Twinning Program
|
8 |
-
Listening & Speaking IE1
|
9 |
-
Listening & Speaking IE2
|
10 |
-
Listening & Speaking IE2 (for twinning program)
|
11 |
-
Reading & Writing IE1
|
12 |
-
Reading & Writing IE2
|
13 |
-
Reading & Writing IE2 (for twinning program)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/column_all.txt
DELETED
@@ -1,78 +0,0 @@
|
|
1 |
-
Algorithms & Data Structures
|
2 |
-
Analytics for Observational Data
|
3 |
-
Applied Artificial Intelligence
|
4 |
-
Basic Electrical Concepts & Circuits
|
5 |
-
Blockchain
|
6 |
-
C/C++ Programming
|
7 |
-
C/C++ Programming in Unix
|
8 |
-
Circuit Analysis
|
9 |
-
Communication Networks
|
10 |
-
Computer Architecture
|
11 |
-
Computer Graphics
|
12 |
-
Computer Networks
|
13 |
-
Data Analysis
|
14 |
-
Data Science and Data Visualization
|
15 |
-
Decision Support System
|
16 |
-
Digital Communications
|
17 |
-
Digital Image Processing
|
18 |
-
Digital Logic Design
|
19 |
-
Digital Logic Design Laboratory
|
20 |
-
Digital Signal Processing
|
21 |
-
Discrete Mathematics
|
22 |
-
Electronic Devices & Circuits
|
23 |
-
Embedded Systems
|
24 |
-
Entrepreneurship
|
25 |
-
Formal Programming Methods
|
26 |
-
Functional Programming
|
27 |
-
Fundamental Concepts of Data Security
|
28 |
-
Fundamentals of Big Data Technology
|
29 |
-
Fundamentals of Programming
|
30 |
-
Human-Computer Interaction
|
31 |
-
IT Project Management
|
32 |
-
Information System Management
|
33 |
-
Information Theory & Coding
|
34 |
-
Internet of Things
|
35 |
-
Internship
|
36 |
-
Introduction to Artificial Intelligence
|
37 |
-
Introduction to Computing
|
38 |
-
Introduction to Data Mining
|
39 |
-
Introduction to Data Science
|
40 |
-
Introduction to Distributed Computing
|
41 |
-
Introduction to Wireless Network
|
42 |
-
Linear Algebra
|
43 |
-
Micro-processing Systems
|
44 |
-
Microprocessor Systems & Interfacing
|
45 |
-
Mobile Application Development
|
46 |
-
Net-Centric Programming
|
47 |
-
Network Design and Evaluation
|
48 |
-
Network Management and Protocols
|
49 |
-
Network Programming
|
50 |
-
Networks & Systems Security
|
51 |
-
Object Oriented Data Engineering (Java)
|
52 |
-
Object-Oriented Analysis and Design
|
53 |
-
Object-Oriented Programming
|
54 |
-
Operating Systems
|
55 |
-
Optimization
|
56 |
-
Principles of Database Management
|
57 |
-
Principles of EE1
|
58 |
-
Principles of EE1 Laboratory
|
59 |
-
Principles of Programming Languages
|
60 |
-
Probability, Statistic & Random Process
|
61 |
-
Programming Languages & Translators
|
62 |
-
Projects
|
63 |
-
Regression Analysis
|
64 |
-
Scalable and Distributed Computing
|
65 |
-
Signals & Systems
|
66 |
-
Signals & Systems Laboratory
|
67 |
-
Skills for Communicating Information
|
68 |
-
Software Architecture
|
69 |
-
Software Engineering
|
70 |
-
Software Implementation
|
71 |
-
Special Study of the Field
|
72 |
-
Statistical Methods
|
73 |
-
System & Network Administration
|
74 |
-
System and Network Security
|
75 |
-
Theoretical Models in Computing
|
76 |
-
Thesis
|
77 |
-
Web Application Development
|
78 |
-
Web Programming
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/config.toml
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
[server]
|
2 |
-
headless = true
|
|
|
|
|
|
Analyze-and-predict-student-scores/courses_list.txt
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
Calculus 1
|
2 |
-
Calculus 2
|
3 |
-
Calculus 3
|
4 |
-
Chemistry Laboratory
|
5 |
-
Chemistry for Engineers
|
6 |
-
Critical Thinking
|
7 |
-
History of Vietnamese Communist Party
|
8 |
-
Internship
|
9 |
-
Philosophy of Marxism and Leninism
|
10 |
-
Physics 1
|
11 |
-
Physics 2
|
12 |
-
Physics 3
|
13 |
-
Physics 3 Laboratory
|
14 |
-
Physics 4
|
15 |
-
Political economics of Marxism and Leninism
|
16 |
-
Principles of Database Management
|
17 |
-
Principles of Marxism
|
18 |
-
Principles of Programming Languages
|
19 |
-
Probability, Statistic & Random Process
|
20 |
-
Regression Analysis
|
21 |
-
Revolutionary Lines of Vietnamese Communist Party
|
22 |
-
Scientific socialism
|
23 |
-
Speaking AE2
|
24 |
-
Special Study of the Field
|
25 |
-
Thesis
|
26 |
-
Writing AE1
|
27 |
-
Writing AE2
|
28 |
-
Intensive English 0- Twinning Program
|
29 |
-
Intensive English 01- Twinning Program
|
30 |
-
Intensive English 02- Twinning Program
|
31 |
-
Intensive English 03- Twinning Program
|
32 |
-
Intensive English 1- Twinning Program
|
33 |
-
Intensive English 2- Twinning Program
|
34 |
-
Intensive English 3- Twinning Program
|
35 |
-
Listening & Speaking IE1
|
36 |
-
Listening & Speaking IE2
|
37 |
-
Listening & Speaking IE2 (for twinning program)
|
38 |
-
Physical Training 1
|
39 |
-
Physical Training 2
|
40 |
-
Reading & Writing IE1
|
41 |
-
Reading & Writing IE2
|
42 |
-
Reading & Writing IE2 (for twinning program)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/dataScore.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
Analyze-and-predict-student-scores/function.py
DELETED
@@ -1,253 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
import plotly.express as px
|
4 |
-
import plotly.graph_objs as go
|
5 |
-
import streamlit as st
|
6 |
-
import joblib
|
7 |
-
|
8 |
-
|
9 |
-
def get_year(student_id):
|
10 |
-
return int(student_id[6:8])
|
11 |
-
|
12 |
-
def process_data(raw_data):
|
13 |
-
# Pivot the DataFrame
|
14 |
-
pivot_df = pd.pivot_table(raw_data, values='DiemHP', index='MaSV', columns='TenMH', aggfunc='first')
|
15 |
-
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
16 |
-
pivot_df.columns.name = None
|
17 |
-
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
18 |
-
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
19 |
-
# Drop unnecessary columns
|
20 |
-
cols_to_drop = []
|
21 |
-
with open('cols_to_drop.txt', 'r') as f:
|
22 |
-
for line in f:
|
23 |
-
cols_to_drop.append(str(line.strip()))
|
24 |
-
existing_cols = [col for col in cols_to_drop if col in pivot_df.columns]
|
25 |
-
if existing_cols:
|
26 |
-
pivot_df.drop(existing_cols, axis=1, inplace=True)
|
27 |
-
|
28 |
-
# Merge with the XepLoaiNH column
|
29 |
-
df = pd.merge(pivot_df, raw_data[['MaSV', 'XepLoaiNH']], on='MaSV')
|
30 |
-
df.drop_duplicates(subset='MaSV', keep='last', inplace=True)
|
31 |
-
dfid=df['MaSV']
|
32 |
-
df.drop(['MaSV', 'XepLoaiNH'], axis=1, inplace=True)
|
33 |
-
df.replace(['WH', 'VT',"I"], np.nan, inplace=True)
|
34 |
-
df.iloc[:, :-1] = df.iloc[:, :-1].apply(pd.to_numeric)
|
35 |
-
df = pd.merge(dfid,df,left_index=True, right_index=True)
|
36 |
-
df['MaSV_school'] = df['MaSV'].str.slice(2, 4)
|
37 |
-
df['Major'] = df['MaSV'].str.slice(0, 2)
|
38 |
-
df["Year"] = 2000 + df["MaSV"].apply(get_year)
|
39 |
-
df["Year"]=df["Year"].astype(str)
|
40 |
-
df=df.drop(columns='MaSV')
|
41 |
-
|
42 |
-
return df
|
43 |
-
|
44 |
-
def process_data_per(raw_data):
|
45 |
-
# Pivot the DataFrame
|
46 |
-
pivot_df = pd.pivot_table(raw_data, values='DiemHP', index='MaSV', columns='TenMH', aggfunc='first')
|
47 |
-
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
48 |
-
pivot_df.columns.name = None
|
49 |
-
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
50 |
-
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
51 |
-
|
52 |
-
# Drop unnecessary columns
|
53 |
-
cols_to_drop = []
|
54 |
-
with open('cols_to_drop.txt', 'r') as f:
|
55 |
-
for line in f:
|
56 |
-
cols_to_drop.append(str(line.strip()))
|
57 |
-
existing_cols = [col for col in cols_to_drop if col in pivot_df.columns]
|
58 |
-
if existing_cols:
|
59 |
-
pivot_df.drop(existing_cols, axis=1, inplace=True)
|
60 |
-
pivot_df.replace('WH', np.nan, inplace=True)
|
61 |
-
pivot_df.iloc[:, 1:] = pivot_df.iloc[:, 1:].apply(pd.to_numeric)
|
62 |
-
# Merge with the XepLoaiNH column
|
63 |
-
df = pd.merge(pivot_df, raw_data[['MaSV', 'XepLoaiNH']], on='MaSV')
|
64 |
-
df.drop_duplicates(subset='MaSV', keep='last', inplace=True)
|
65 |
-
df.drop(['XepLoaiNH'], axis=1, inplace=True)
|
66 |
-
|
67 |
-
return df
|
68 |
-
|
69 |
-
|
70 |
-
def process_predict_data(raw_data):
|
71 |
-
dtk = raw_data[["MaSV", "DTBTKH4"]].copy()
|
72 |
-
dtk.drop_duplicates(subset="MaSV", keep="last", inplace=True)
|
73 |
-
|
74 |
-
count_duplicates = raw_data.groupby(["MaSV", "MaMH"]).size().reset_index(name="Times")
|
75 |
-
courses = raw_data[raw_data['MaMH'].str.startswith('IT')]
|
76 |
-
courses_list=courses['MaMH'].unique().tolist()
|
77 |
-
|
78 |
-
# Create two new columns for counting courses that are in the courses_list or not
|
79 |
-
count_duplicates["fail_courses_list"] = (
|
80 |
-
(count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] >= 2)
|
81 |
-
).astype(int)
|
82 |
-
|
83 |
-
count_duplicates["fail_not_courses_list"] = (
|
84 |
-
(~count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] >= 2)
|
85 |
-
).astype(int)
|
86 |
-
|
87 |
-
count_duplicates["pass_courses"] = (
|
88 |
-
(~count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] == 1)
|
89 |
-
).astype(int)
|
90 |
-
|
91 |
-
# Group the data by "MaSV" and sum the counts for the two new columns
|
92 |
-
fail = (
|
93 |
-
count_duplicates.groupby("MaSV")[["fail_courses_list", "fail_not_courses_list"]]
|
94 |
-
.sum()
|
95 |
-
.reset_index()
|
96 |
-
)
|
97 |
-
|
98 |
-
# Rename the columns to reflect the split of courses_list and not courses_list
|
99 |
-
fail.columns = ["MaSV", "fail_courses_list_count", "fail_not_courses_list_count"]
|
100 |
-
|
101 |
-
df = pd.merge(dtk, fail, on="MaSV")
|
102 |
-
df = df.rename(columns={"DTBTKH4": "GPA"})
|
103 |
-
|
104 |
-
data = raw_data[['MaSV','NHHK','SoTCDat']]
|
105 |
-
data = data.drop_duplicates()
|
106 |
-
data = data.groupby(['MaSV'])['SoTCDat'].median().reset_index(name='Mean_Cre').round(2)
|
107 |
-
|
108 |
-
df = pd.merge(df, data, on='MaSV')
|
109 |
-
df1=raw_data[['MaSV','MaMH','NHHK']]
|
110 |
-
courses_list = raw_data[(raw_data['MaMH'].str.startswith('EN')) & ~(raw_data['MaMH'].str.contains('EN007|EN008|EN011|EN012'))].MaMH.tolist()
|
111 |
-
filtered_df = df1[df1['MaMH'].isin(courses_list)]
|
112 |
-
nhhk_counts = filtered_df.groupby('MaSV')['NHHK'].nunique().reset_index(name='EPeriod')
|
113 |
-
df = pd.merge(df, nhhk_counts, on='MaSV', how='left').fillna(0)
|
114 |
-
df=df[['MaSV','GPA' ,'Mean_Cre', 'fail_courses_list_count' ,'fail_not_courses_list_count' ,'EPeriod']]
|
115 |
-
return df
|
116 |
-
|
117 |
-
def predict_late_student(test_df):
|
118 |
-
# Load the pre-trained model
|
119 |
-
model=joblib.load("model/R_Late.joblib")
|
120 |
-
model1=joblib.load("model/R_Sem.joblib")
|
121 |
-
# Process the student data
|
122 |
-
test_dfed = process_predict_data(test_df)
|
123 |
-
|
124 |
-
# Save the student ID column
|
125 |
-
std_id = test_dfed.iloc[:, 0]
|
126 |
-
|
127 |
-
# Drop the student ID column
|
128 |
-
test_dfed = test_dfed.drop(test_dfed.columns[0], axis=1)
|
129 |
-
|
130 |
-
# Make predictions using the pre-trained model
|
131 |
-
prediction = model.predict(test_dfed)
|
132 |
-
|
133 |
-
# Add a new column to the student data indicating if the student is late
|
134 |
-
|
135 |
-
|
136 |
-
prediction1 = model1.predict(test_dfed)
|
137 |
-
|
138 |
-
# Add a new column to the student data indicating if the student is late
|
139 |
-
test_dfed['Period'] = prediction1
|
140 |
-
test_dfed['Result'] = ['late' if p == 1 else 'not late' for p in prediction]
|
141 |
-
|
142 |
-
# Add the student ID column back to the beginning of the DataFrame
|
143 |
-
test_dfed.insert(0, 'MaSV', std_id)
|
144 |
-
|
145 |
-
for index, row in test_dfed.iterrows():
|
146 |
-
if row['Period'] <= 9 and row['Result'] == 'late':
|
147 |
-
test_dfed.loc[index, 'Period'] = row['Period'] / 2
|
148 |
-
test_dfed.loc[index, 'Result'] = 'may late'
|
149 |
-
else:
|
150 |
-
test_dfed.loc[index, 'Period'] = row['Period'] / 2
|
151 |
-
|
152 |
-
return test_dfed
|
153 |
-
def predict_rank(raw_data):
|
154 |
-
# Pivot the DataFrame
|
155 |
-
raw_data = raw_data[raw_data["MaSV"].str.startswith("IT")]
|
156 |
-
raw_data = raw_data[raw_data['MaMH'].str.startswith('IT')]
|
157 |
-
pivot_df = pd.pivot_table(
|
158 |
-
raw_data, values="DiemHP", index="MaSV", columns="TenMH", aggfunc="first"
|
159 |
-
)
|
160 |
-
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
161 |
-
pivot_df.columns.name = None
|
162 |
-
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
163 |
-
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
164 |
-
|
165 |
-
pivot_df.replace("WH", np.nan, inplace=True)
|
166 |
-
pivot_df.iloc[:, 1:] = pivot_df.iloc[:, 1:].apply(pd.to_numeric)
|
167 |
-
|
168 |
-
# Merge with the XepLoaiNH column
|
169 |
-
df = pd.merge(pivot_df, raw_data[["MaSV", "DTBTK"]], on="MaSV")
|
170 |
-
df.drop_duplicates(subset="MaSV", keep="last", inplace=True)
|
171 |
-
col=df.drop(['MaSV', 'DTBTK'], axis=1)
|
172 |
-
|
173 |
-
columns_data = []
|
174 |
-
with open('column_all.txt', 'r') as f:
|
175 |
-
for line in f:
|
176 |
-
columns_data.append(str(line.strip()))
|
177 |
-
|
178 |
-
|
179 |
-
r=df.drop(columns=['MaSV','DTBTK'])
|
180 |
-
merge=r.columns.tolist()
|
181 |
-
dup=pd.DataFrame(columns=columns_data)
|
182 |
-
df= pd.merge(dup, df, on=merge, how='outer')
|
183 |
-
for col in df.columns:
|
184 |
-
if df[col].isnull().values.any():
|
185 |
-
df[col].fillna(value=df["DTBTK"], inplace=True)
|
186 |
-
std_id = df['MaSV'].copy()
|
187 |
-
df=df.drop(['MaSV', 'DTBTK'], axis=1)
|
188 |
-
df.sort_index(axis=1, inplace=True)
|
189 |
-
model=joblib.load("model/R_rank.joblib")
|
190 |
-
prediction = model.predict(df)
|
191 |
-
df['Pred Rank'] = prediction
|
192 |
-
df.insert(0, 'MaSV', std_id)
|
193 |
-
df=df[['MaSV','Pred Rank']]
|
194 |
-
return df
|
195 |
-
|
196 |
-
|
197 |
-
def predict_one_student(raw_data, student_id):
|
198 |
-
# Subset the DataFrame to relevant columns and rows
|
199 |
-
student = process_data_per(raw_data)
|
200 |
-
filtered_df = student[student["MaSV"] == student_id]
|
201 |
-
if len(filtered_df) > 0:
|
202 |
-
selected_row = filtered_df.iloc[0, 1:].dropna()
|
203 |
-
colname = filtered_df.dropna().columns.tolist()
|
204 |
-
values = selected_row.values.tolist()
|
205 |
-
|
206 |
-
# create a line chart using plotly
|
207 |
-
fig1 = go.Figure()
|
208 |
-
fig1.add_trace(go.Histogram(x=values, nbinsx=40, name=student_id,marker=dict(color='rgba(50, 100, 200, 0.7)')))
|
209 |
-
|
210 |
-
# set the chart title and axis labels
|
211 |
-
fig1.update_layout(
|
212 |
-
title="Histogram for student {}".format(student_id),
|
213 |
-
xaxis_title="Value",
|
214 |
-
yaxis_title="Frequency",
|
215 |
-
width=500
|
216 |
-
)
|
217 |
-
|
218 |
-
# create a bar chart using plotly express
|
219 |
-
data = raw_data[['MaSV', 'NHHK', 'TenMH', 'DiemHP']]
|
220 |
-
data['TenMH'] = data['TenMH'].str.lstrip()
|
221 |
-
data['NHHK'] = data['NHHK'].apply(lambda x: str(x)[:4] + ' S ' + str(x)[4:])
|
222 |
-
rows_to_drop = []
|
223 |
-
with open('rows_to_drop.txt', 'r') as f:
|
224 |
-
for line in f:
|
225 |
-
rows_to_drop.append(str(line.strip()))
|
226 |
-
data = data[~data['TenMH'].isin(rows_to_drop)]
|
227 |
-
student_data = data[data['MaSV'] == student_id][['NHHK', 'TenMH', 'DiemHP']]
|
228 |
-
student_data['DiemHP'] = pd.to_numeric(student_data['DiemHP'], errors='coerce')
|
229 |
-
|
230 |
-
fig2 = px.bar(student_data, x='TenMH', y='DiemHP', color='NHHK', title='Student Score vs. Course')
|
231 |
-
fig2.update_layout(
|
232 |
-
title="Student Score vs. Course",
|
233 |
-
xaxis_title=None,
|
234 |
-
yaxis_title="Score",
|
235 |
-
)
|
236 |
-
fig2.add_shape(
|
237 |
-
type="line",
|
238 |
-
x0=0,
|
239 |
-
y0=50,
|
240 |
-
x1=len(student_data['TenMH'])-1,
|
241 |
-
y1=50,
|
242 |
-
line=dict(color='red', width=3)
|
243 |
-
)
|
244 |
-
|
245 |
-
# display the charts using st.column
|
246 |
-
col1, col2 = st.columns(2)
|
247 |
-
with col1:
|
248 |
-
st.plotly_chart(fig1)
|
249 |
-
|
250 |
-
with col2:
|
251 |
-
st.plotly_chart(fig2)
|
252 |
-
else:
|
253 |
-
st.write("No data found for student {}".format(student_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/main.py
DELETED
@@ -1,310 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import streamlit as st
|
3 |
-
import plotly.express as px
|
4 |
-
import numpy as np
|
5 |
-
import plotly.graph_objs as go
|
6 |
-
from function import process_data,predict_late_student, predict_rank,predict_one_student
|
7 |
-
from datetime import datetime
|
8 |
-
from PIL import Image
|
9 |
-
import base64
|
10 |
-
from io import BytesIO
|
11 |
-
|
12 |
-
|
13 |
-
df = pd.DataFrame()
|
14 |
-
|
15 |
-
|
16 |
-
def color_cell(val):
|
17 |
-
if val == "not late":
|
18 |
-
color = "green"
|
19 |
-
elif val == "may late":
|
20 |
-
color = "yellow"
|
21 |
-
elif val == "late":
|
22 |
-
color = "red"
|
23 |
-
else:
|
24 |
-
color = "black"
|
25 |
-
return "color: %s" % color
|
26 |
-
|
27 |
-
|
28 |
-
def get_year(student_id):
|
29 |
-
return int(student_id[6:8])
|
30 |
-
|
31 |
-
|
32 |
-
def generate_comment(median):
|
33 |
-
if median < 30:
|
34 |
-
comment = f"The median score for {course} is quite low at {median}. Students may need to work harder to improve their performance."
|
35 |
-
elif median < 50:
|
36 |
-
comment = f"The median score for {course} is below average at {median}. Students should work on improving their understanding of the material."
|
37 |
-
elif median < 80:
|
38 |
-
comment = f"The median score for {course} is solid at {median}. Students are making good progress but could still work on improving their skills."
|
39 |
-
else:
|
40 |
-
comment = f"The median score for {course} is outstanding at {median}. Students are doing an excellent job in this course."
|
41 |
-
return comment
|
42 |
-
|
43 |
-
favicon = 'R.png'
|
44 |
-
|
45 |
-
st.set_page_config(
|
46 |
-
page_title='Student System',
|
47 |
-
page_icon=favicon,
|
48 |
-
layout='wide',
|
49 |
-
)
|
50 |
-
currentYear = datetime.now().year
|
51 |
-
im1 = Image.open("R.png")
|
52 |
-
|
53 |
-
# get the image from the URL
|
54 |
-
|
55 |
-
|
56 |
-
# create a three-column layout
|
57 |
-
col1, col2 = st.columns([1, 3])
|
58 |
-
|
59 |
-
# add a centered image to the first and third columns
|
60 |
-
with col1:
|
61 |
-
st.image(im1, width=150)
|
62 |
-
|
63 |
-
|
64 |
-
# add a centered title to the second column
|
65 |
-
with col2:
|
66 |
-
st.title("Student Performance Prediction System")
|
67 |
-
|
68 |
-
|
69 |
-
# Load the raw data
|
70 |
-
# uploaded_file = st.file_uploader("Choose a score file", type=["xlsx", "csv"])
|
71 |
-
|
72 |
-
# if uploaded_file is not None:
|
73 |
-
# file_contents = uploaded_file.read()
|
74 |
-
# file_ext = uploaded_file.name.split(".")[-1].lower() # Get the file extension
|
75 |
-
|
76 |
-
# if file_ext == "csv":
|
77 |
-
# df = pd.read_csv(BytesIO(file_contents))
|
78 |
-
# elif file_ext in ["xls", "xlsx"]:
|
79 |
-
# df = pd.read_excel(BytesIO(file_contents))
|
80 |
-
# else:
|
81 |
-
# st.error("Invalid file format. Please upload a CSV or Excel file.")
|
82 |
-
|
83 |
-
# raw_data = df.copy()
|
84 |
-
raw_data = pd.read_csv("All_major.csv")
|
85 |
-
st.sidebar.title("Analysis Tool")
|
86 |
-
|
87 |
-
option = ["Dashboard", "Predict"]
|
88 |
-
# Add an expander to the sidebar
|
89 |
-
tabs = st.sidebar.selectbox("Select an option", option)
|
90 |
-
|
91 |
-
|
92 |
-
# draw histogram
|
93 |
-
# Streamlit app
|
94 |
-
if tabs == "Dashboard":
|
95 |
-
# try:
|
96 |
-
|
97 |
-
df = process_data(raw_data)
|
98 |
-
unique_values_major = df["Major"].unique()
|
99 |
-
major=st.selectbox("Select a major:", unique_values_major)
|
100 |
-
if major == "All":
|
101 |
-
# If so, display the entire DataFrame
|
102 |
-
filtered_df = df.copy()
|
103 |
-
else:
|
104 |
-
# Otherwise, filter the DataFrame based on the selected value
|
105 |
-
filtered_df = df[df["Major"] == major]
|
106 |
-
filtered_df = filtered_df.dropna(axis=1, how="all")
|
107 |
-
|
108 |
-
# Select course dropdown
|
109 |
-
df=filtered_df
|
110 |
-
unique_values = df["MaSV_school"].unique()
|
111 |
-
all_values = np.concatenate([["All"],unique_values ])
|
112 |
-
school = st.selectbox("Select a school:", all_values)
|
113 |
-
if school == "All":
|
114 |
-
# If so, display the entire DataFrame
|
115 |
-
filtered_df = df.copy()
|
116 |
-
else:
|
117 |
-
# Otherwise, filter the DataFrame based on the selected value
|
118 |
-
filtered_df = df[df["MaSV_school"] == school]
|
119 |
-
filtered_df = filtered_df.dropna(axis=1, how="all")
|
120 |
-
|
121 |
-
# Select course dropdown
|
122 |
-
df=filtered_df
|
123 |
-
unique_values_year = df["Year"].unique()
|
124 |
-
all_values_year = np.concatenate([["All"],unique_values_year ])
|
125 |
-
year = st.selectbox("Select a year:", all_values_year)
|
126 |
-
|
127 |
-
if year == "All":
|
128 |
-
# If so, display the entire DataFrame
|
129 |
-
filtered_df = df.copy()
|
130 |
-
else:
|
131 |
-
# Otherwise, filter the DataFrame based on the selected value
|
132 |
-
filtered_df = df[df["Year"] == year]
|
133 |
-
filtered_df = filtered_df.dropna(axis=1, how="all")
|
134 |
-
|
135 |
-
|
136 |
-
df=filtered_df
|
137 |
-
|
138 |
-
options = df.columns[:-3]
|
139 |
-
course = st.selectbox("Select a course:", options)
|
140 |
-
|
141 |
-
# Filter the data for the selected course
|
142 |
-
course_data = df[course].dropna()
|
143 |
-
|
144 |
-
# Calculate summary statistics for the course
|
145 |
-
|
146 |
-
|
147 |
-
st.write(generate_comment(course_data.median()))
|
148 |
-
# Show summary statistics
|
149 |
-
|
150 |
-
st.write("Course:", course, " of ", school," student" )
|
151 |
-
|
152 |
-
|
153 |
-
col1, col2,col3= st.columns(3)
|
154 |
-
|
155 |
-
with col1:
|
156 |
-
fig = go.Figure()
|
157 |
-
fig.add_trace(
|
158 |
-
go.Histogram(
|
159 |
-
x=course_data, nbinsx=40, name="Histogram"
|
160 |
-
)
|
161 |
-
)
|
162 |
-
fig.update_layout(
|
163 |
-
title="Histogram of Scores for {}".format(course),
|
164 |
-
xaxis_title="Score",
|
165 |
-
yaxis_title="Count",
|
166 |
-
height=400,
|
167 |
-
width=400
|
168 |
-
)
|
169 |
-
st.plotly_chart(fig)
|
170 |
-
|
171 |
-
with col2:
|
172 |
-
fig = go.Figure()
|
173 |
-
fig.add_trace(
|
174 |
-
go.Box(
|
175 |
-
y=course_data, name="Box plot"
|
176 |
-
)
|
177 |
-
)
|
178 |
-
fig.update_layout(
|
179 |
-
title="Box plot of Scores for {}".format(course),
|
180 |
-
yaxis_title="Score",
|
181 |
-
height=400,
|
182 |
-
width=400
|
183 |
-
)
|
184 |
-
st.plotly_chart(fig)
|
185 |
-
with col3:
|
186 |
-
raw_data['MaSV_school'] = raw_data['MaSV'].str.slice(2, 4)
|
187 |
-
if school == "All":
|
188 |
-
# If so, display the entire DataFrame
|
189 |
-
data = raw_data.copy()
|
190 |
-
else:
|
191 |
-
# Otherwise, filter the DataFrame based on the selected value
|
192 |
-
data = raw_data[raw_data["MaSV_school"] == school]
|
193 |
-
df1=data[['TenMH','NHHK','DiemHP']].copy()
|
194 |
-
df1['DiemHP'] = pd.to_numeric(df1['DiemHP'], errors='coerce')
|
195 |
-
df1['NHHK'] = df1['NHHK'].apply(lambda x: str(x)[:4] + ' S ' + str(x)[4:])
|
196 |
-
selected_TenMH = " " + course
|
197 |
-
filtered_df1 = df1[df1['TenMH'] == selected_TenMH]
|
198 |
-
mean_DiemHP = filtered_df1.groupby('NHHK')['DiemHP'].mean().round(1).reset_index(name='Mean')
|
199 |
-
# Create Plotly line graph
|
200 |
-
fig = px.line(mean_DiemHP, x='NHHK', y='Mean', title=f"Mean DiemHP for{selected_TenMH} thought period")
|
201 |
-
fig.update_layout(
|
202 |
-
height=400,
|
203 |
-
width=400)
|
204 |
-
st.plotly_chart(fig)
|
205 |
-
|
206 |
-
|
207 |
-
# except:
|
208 |
-
# st.write("Add CSV to analysis")
|
209 |
-
|
210 |
-
|
211 |
-
# predict student
|
212 |
-
|
213 |
-
elif tabs == "Predict":
|
214 |
-
try:
|
215 |
-
raw_data = pd.read_csv("dataScore.csv")
|
216 |
-
predict = predict_late_student(raw_data)
|
217 |
-
rank = predict_rank(raw_data)
|
218 |
-
|
219 |
-
predict = pd.merge(predict, rank, on="MaSV")
|
220 |
-
rank_mapping = {
|
221 |
-
"Khá": "Good",
|
222 |
-
"Trung Bình Khá": "Average good",
|
223 |
-
"Giỏi": "Very good",
|
224 |
-
"Kém": "Very weak",
|
225 |
-
"Trung Bình": "Ordinary",
|
226 |
-
"Yếu": "Weak",
|
227 |
-
"Xuất Sắc": "Excellent",
|
228 |
-
}
|
229 |
-
predict["Pred Rank"].replace(rank_mapping, inplace=True)
|
230 |
-
|
231 |
-
# Filter students who have a Result value of "late"
|
232 |
-
df_late = predict
|
233 |
-
|
234 |
-
MaSV = st.text_input("Enter Student ID:")
|
235 |
-
if MaSV:
|
236 |
-
df_filtered = predict[predict["MaSV"] == MaSV]
|
237 |
-
styled_table = (
|
238 |
-
df_filtered[["MaSV", "GPA", "Mean_Cre", "Pred Rank", "Result", "Period"]]
|
239 |
-
.style.applymap(color_cell)
|
240 |
-
.format({"GPA": "{:.2f}", "Mean_Cre": "{:.1f}", "Period": "{:.1f}"})
|
241 |
-
)
|
242 |
-
|
243 |
-
with st.container():
|
244 |
-
st.write(styled_table)
|
245 |
-
predict_one_student(raw_data,MaSV)
|
246 |
-
else:
|
247 |
-
df_late = predict
|
248 |
-
# df_late = predict[(predict['Pred Rank'] == 'Yếu') | (predict['Pred Rank'] == 'Kém')]
|
249 |
-
df_late["Year"] = 2000 + df_late["MaSV"].apply(get_year)
|
250 |
-
df_late = df_late[
|
251 |
-
(df_late["Year"] != currentYear - 1) & (df_late["Year"] != currentYear - 2)
|
252 |
-
]
|
253 |
-
year = st.selectbox("Select Year", options=df_late["Year"].unique())
|
254 |
-
df_filtered = df_late[df_late["Year"] == year]
|
255 |
-
styled_table = (
|
256 |
-
df_filtered[["MaSV", "GPA", "Mean_Cre", "Pred Rank", "Result", "Period"]]
|
257 |
-
.style.applymap(color_cell)
|
258 |
-
.format({"GPA": "{:.2f}", "Mean_Cre": "{:.2f}", "Period": "{:.2f}"})
|
259 |
-
)
|
260 |
-
csv = df_filtered.to_csv(index=False)
|
261 |
-
b64 = base64.b64encode(csv.encode()).decode()
|
262 |
-
href = f'<a href="data:file/csv;base64,{b64}" download="Preidct data.csv">Download CSV</a>'
|
263 |
-
st.markdown(href, unsafe_allow_html=True)
|
264 |
-
fig1 = px.pie(
|
265 |
-
df_filtered,
|
266 |
-
names="Pred Rank",
|
267 |
-
title="Pred Rank",
|
268 |
-
color_discrete_sequence=px.colors.sequential.Mint,
|
269 |
-
height=400,
|
270 |
-
width=400,
|
271 |
-
)
|
272 |
-
fig2 = px.pie(
|
273 |
-
df_filtered,
|
274 |
-
names="Result",
|
275 |
-
title="Result",
|
276 |
-
color_discrete_sequence=px.colors.sequential.Peach,
|
277 |
-
height=400,
|
278 |
-
width=400,
|
279 |
-
)
|
280 |
-
fig1.update_layout(
|
281 |
-
title={
|
282 |
-
"text": "Pred Rank",
|
283 |
-
"y": 0.95,
|
284 |
-
"x": 0.5,
|
285 |
-
"xanchor": "center",
|
286 |
-
"yanchor": "top",
|
287 |
-
}
|
288 |
-
)
|
289 |
-
fig2.update_layout(
|
290 |
-
title={
|
291 |
-
"text": "Result",
|
292 |
-
"y": 0.95,
|
293 |
-
"x": 0.5,
|
294 |
-
"xanchor": "center",
|
295 |
-
"yanchor": "top",
|
296 |
-
}
|
297 |
-
)
|
298 |
-
st.dataframe(styled_table)
|
299 |
-
col1, col2 = st.columns([1, 1])
|
300 |
-
with col1:
|
301 |
-
st.plotly_chart(fig1)
|
302 |
-
with col2:
|
303 |
-
st.plotly_chart(fig2)
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
# display the grid of pie charts using Streamlit
|
308 |
-
|
309 |
-
except:
|
310 |
-
st.write('Add CSV to analysis')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/model/R_Late.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d201e84514a400d73d79097b43fabf12cc96923e7abb1bc5c3be22bc5dea7445
|
3 |
-
size 497289
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/model/R_Sem.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:adfbee871506a3a7e6e3ca02d7bd205cceab50fdbec47878d01773ed59dd5e7c
|
3 |
-
size 2638353
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/model/R_rank.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:89b2a11b69b622db9e2401c735d0bb0b4a5f791269de30c9799a0817f619cd96
|
3 |
-
size 205089
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/requirements.txt
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
numpy
|
2 |
-
Cython==0.29.21
|
3 |
-
scikit-learn
|
4 |
-
pandas
|
5 |
-
plotly
|
6 |
-
scipy
|
7 |
-
pyDOE
|
8 |
-
openpyxl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Analyze-and-predict-student-scores/rows_to_drop.txt
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
Intensive English 0- Twinning Program
|
2 |
-
Intensive English 01- Twinning Program
|
3 |
-
Intensive English 02- Twinning Program
|
4 |
-
Intensive English 03- Twinning Program
|
5 |
-
Intensive English 1- Twinning Program
|
6 |
-
Intensive English 2- Twinning Program
|
7 |
-
Intensive English 3- Twinning Program
|
8 |
-
Listening & Speaking IE1
|
9 |
-
Listening & Speaking IE2
|
10 |
-
Listening & Speaking IE2 (for twinning program)
|
11 |
-
Physical Training 1
|
12 |
-
Physical Training 2
|
13 |
-
Reading & Writing IE1
|
14 |
-
Reading & Writing IE2
|
15 |
-
Reading & Writing IE2 (for twinning program)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|