Upload 16 files
Browse files- .gitattributes +1 -0
- All_maj.csv +3 -0
- R.png +0 -0
- README.md +2 -13
- __pycache__/function.cpython-310.pyc +0 -0
- cols_to_drop.txt +13 -0
- column_all.txt +78 -0
- config.toml +2 -0
- courses_list.txt +42 -0
- dataScore.csv +0 -0
- function.py +253 -0
- main.py +310 -0
- model/R_Late.joblib +3 -0
- model/R_Sem.joblib +3 -0
- model/R_rank.joblib +3 -0
- requirements.txt +8 -0
- rows_to_drop.txt +15 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
Analyze-and-predict-student-scores/All_maj.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
Analyze-and-predict-student-scores/All_maj.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
All_maj.csv filter=lfs diff=lfs merge=lfs -text
|
All_maj.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9b5865503b770f70fe93a3a1b39e2d53036b45a7fa5c5a17cf226cf9bf0545f
|
3 |
+
size 47671628
|
R.png
ADDED
![]() |
README.md
CHANGED
@@ -1,13 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
emoji: 🏢
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: gray
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.21.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: other
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# Analyze-and-predict-student-performance
|
2 |
+
Link to web-app: https://itdsiu19001-analyze-and-predict-student-performance-main-oiibq6.streamlit.app/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/function.cpython-310.pyc
ADDED
Binary file (7.73 kB). View file
|
|
cols_to_drop.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Intensive English 0- Twinning Program
|
2 |
+
Intensive English 01- Twinning Program
|
3 |
+
Intensive English 02- Twinning Program
|
4 |
+
Intensive English 03- Twinning Program
|
5 |
+
Intensive English 1- Twinning Program
|
6 |
+
Intensive English 2- Twinning Program
|
7 |
+
Intensive English 3- Twinning Program
|
8 |
+
Listening & Speaking IE1
|
9 |
+
Listening & Speaking IE2
|
10 |
+
Listening & Speaking IE2 (for twinning program)
|
11 |
+
Reading & Writing IE1
|
12 |
+
Reading & Writing IE2
|
13 |
+
Reading & Writing IE2 (for twinning program)
|
column_all.txt
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Algorithms & Data Structures
|
2 |
+
Analytics for Observational Data
|
3 |
+
Applied Artificial Intelligence
|
4 |
+
Basic Electrical Concepts & Circuits
|
5 |
+
Blockchain
|
6 |
+
C/C++ Programming
|
7 |
+
C/C++ Programming in Unix
|
8 |
+
Circuit Analysis
|
9 |
+
Communication Networks
|
10 |
+
Computer Architecture
|
11 |
+
Computer Graphics
|
12 |
+
Computer Networks
|
13 |
+
Data Analysis
|
14 |
+
Data Science and Data Visualization
|
15 |
+
Decision Support System
|
16 |
+
Digital Communications
|
17 |
+
Digital Image Processing
|
18 |
+
Digital Logic Design
|
19 |
+
Digital Logic Design Laboratory
|
20 |
+
Digital Signal Processing
|
21 |
+
Discrete Mathematics
|
22 |
+
Electronic Devices & Circuits
|
23 |
+
Embedded Systems
|
24 |
+
Entrepreneurship
|
25 |
+
Formal Programming Methods
|
26 |
+
Functional Programming
|
27 |
+
Fundamental Concepts of Data Security
|
28 |
+
Fundamentals of Big Data Technology
|
29 |
+
Fundamentals of Programming
|
30 |
+
Human-Computer Interaction
|
31 |
+
IT Project Management
|
32 |
+
Information System Management
|
33 |
+
Information Theory & Coding
|
34 |
+
Internet of Things
|
35 |
+
Internship
|
36 |
+
Introduction to Artificial Intelligence
|
37 |
+
Introduction to Computing
|
38 |
+
Introduction to Data Mining
|
39 |
+
Introduction to Data Science
|
40 |
+
Introduction to Distributed Computing
|
41 |
+
Introduction to Wireless Network
|
42 |
+
Linear Algebra
|
43 |
+
Micro-processing Systems
|
44 |
+
Microprocessor Systems & Interfacing
|
45 |
+
Mobile Application Development
|
46 |
+
Net-Centric Programming
|
47 |
+
Network Design and Evaluation
|
48 |
+
Network Management and Protocols
|
49 |
+
Network Programming
|
50 |
+
Networks & Systems Security
|
51 |
+
Object Oriented Data Engineering (Java)
|
52 |
+
Object-Oriented Analysis and Design
|
53 |
+
Object-Oriented Programming
|
54 |
+
Operating Systems
|
55 |
+
Optimization
|
56 |
+
Principles of Database Management
|
57 |
+
Principles of EE1
|
58 |
+
Principles of EE1 Laboratory
|
59 |
+
Principles of Programming Languages
|
60 |
+
Probability, Statistic & Random Process
|
61 |
+
Programming Languages & Translators
|
62 |
+
Projects
|
63 |
+
Regression Analysis
|
64 |
+
Scalable and Distributed Computing
|
65 |
+
Signals & Systems
|
66 |
+
Signals & Systems Laboratory
|
67 |
+
Skills for Communicating Information
|
68 |
+
Software Architecture
|
69 |
+
Software Engineering
|
70 |
+
Software Implementation
|
71 |
+
Special Study of the Field
|
72 |
+
Statistical Methods
|
73 |
+
System & Network Administration
|
74 |
+
System and Network Security
|
75 |
+
Theoretical Models in Computing
|
76 |
+
Thesis
|
77 |
+
Web Application Development
|
78 |
+
Web Programming
|
config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
headless = true
|
courses_list.txt
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Calculus 1
|
2 |
+
Calculus 2
|
3 |
+
Calculus 3
|
4 |
+
Chemistry Laboratory
|
5 |
+
Chemistry for Engineers
|
6 |
+
Critical Thinking
|
7 |
+
History of Vietnamese Communist Party
|
8 |
+
Internship
|
9 |
+
Philosophy of Marxism and Leninism
|
10 |
+
Physics 1
|
11 |
+
Physics 2
|
12 |
+
Physics 3
|
13 |
+
Physics 3 Laboratory
|
14 |
+
Physics 4
|
15 |
+
Political economics of Marxism and Leninism
|
16 |
+
Principles of Database Management
|
17 |
+
Principles of Marxism
|
18 |
+
Principles of Programming Languages
|
19 |
+
Probability, Statistic & Random Process
|
20 |
+
Regression Analysis
|
21 |
+
Revolutionary Lines of Vietnamese Communist Party
|
22 |
+
Scientific socialism
|
23 |
+
Speaking AE2
|
24 |
+
Special Study of the Field
|
25 |
+
Thesis
|
26 |
+
Writing AE1
|
27 |
+
Writing AE2
|
28 |
+
Intensive English 0- Twinning Program
|
29 |
+
Intensive English 01- Twinning Program
|
30 |
+
Intensive English 02- Twinning Program
|
31 |
+
Intensive English 03- Twinning Program
|
32 |
+
Intensive English 1- Twinning Program
|
33 |
+
Intensive English 2- Twinning Program
|
34 |
+
Intensive English 3- Twinning Program
|
35 |
+
Listening & Speaking IE1
|
36 |
+
Listening & Speaking IE2
|
37 |
+
Listening & Speaking IE2 (for twinning program)
|
38 |
+
Physical Training 1
|
39 |
+
Physical Training 2
|
40 |
+
Reading & Writing IE1
|
41 |
+
Reading & Writing IE2
|
42 |
+
Reading & Writing IE2 (for twinning program)
|
dataScore.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
function.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import plotly.express as px
|
4 |
+
import plotly.graph_objs as go
|
5 |
+
import streamlit as st
|
6 |
+
import joblib
|
7 |
+
|
8 |
+
|
9 |
+
def get_year(student_id):
|
10 |
+
return int(student_id[6:8])
|
11 |
+
|
12 |
+
def process_data(raw_data):
|
13 |
+
# Pivot the DataFrame
|
14 |
+
pivot_df = pd.pivot_table(raw_data, values='DiemHP', index='MaSV', columns='TenMH', aggfunc='first')
|
15 |
+
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
16 |
+
pivot_df.columns.name = None
|
17 |
+
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
18 |
+
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
19 |
+
# Drop unnecessary columns
|
20 |
+
cols_to_drop = []
|
21 |
+
with open('cols_to_drop.txt', 'r') as f:
|
22 |
+
for line in f:
|
23 |
+
cols_to_drop.append(str(line.strip()))
|
24 |
+
existing_cols = [col for col in cols_to_drop if col in pivot_df.columns]
|
25 |
+
if existing_cols:
|
26 |
+
pivot_df.drop(existing_cols, axis=1, inplace=True)
|
27 |
+
|
28 |
+
# Merge with the XepLoaiNH column
|
29 |
+
df = pd.merge(pivot_df, raw_data[['MaSV', 'XepLoaiNH']], on='MaSV')
|
30 |
+
df.drop_duplicates(subset='MaSV', keep='last', inplace=True)
|
31 |
+
dfid=df['MaSV']
|
32 |
+
df.drop(['MaSV', 'XepLoaiNH'], axis=1, inplace=True)
|
33 |
+
df.replace(['WH', 'VT',"I"], np.nan, inplace=True)
|
34 |
+
df.iloc[:, :-1] = df.iloc[:, :-1].apply(pd.to_numeric)
|
35 |
+
df = pd.merge(dfid,df,left_index=True, right_index=True)
|
36 |
+
df['MaSV_school'] = df['MaSV'].str.slice(2, 4)
|
37 |
+
df['Major'] = df['MaSV'].str.slice(0, 2)
|
38 |
+
df["Year"] = 2000 + df["MaSV"].apply(get_year)
|
39 |
+
df["Year"]=df["Year"].astype(str)
|
40 |
+
df=df.drop(columns='MaSV')
|
41 |
+
|
42 |
+
return df
|
43 |
+
|
44 |
+
def process_data_per(raw_data):
|
45 |
+
# Pivot the DataFrame
|
46 |
+
pivot_df = pd.pivot_table(raw_data, values='DiemHP', index='MaSV', columns='TenMH', aggfunc='first')
|
47 |
+
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
48 |
+
pivot_df.columns.name = None
|
49 |
+
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
50 |
+
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
51 |
+
|
52 |
+
# Drop unnecessary columns
|
53 |
+
cols_to_drop = []
|
54 |
+
with open('cols_to_drop.txt', 'r') as f:
|
55 |
+
for line in f:
|
56 |
+
cols_to_drop.append(str(line.strip()))
|
57 |
+
existing_cols = [col for col in cols_to_drop if col in pivot_df.columns]
|
58 |
+
if existing_cols:
|
59 |
+
pivot_df.drop(existing_cols, axis=1, inplace=True)
|
60 |
+
pivot_df.replace('WH', np.nan, inplace=True)
|
61 |
+
pivot_df.iloc[:, 1:] = pivot_df.iloc[:, 1:].apply(pd.to_numeric)
|
62 |
+
# Merge with the XepLoaiNH column
|
63 |
+
df = pd.merge(pivot_df, raw_data[['MaSV', 'XepLoaiNH']], on='MaSV')
|
64 |
+
df.drop_duplicates(subset='MaSV', keep='last', inplace=True)
|
65 |
+
df.drop(['XepLoaiNH'], axis=1, inplace=True)
|
66 |
+
|
67 |
+
return df
|
68 |
+
|
69 |
+
|
70 |
+
def process_predict_data(raw_data):
|
71 |
+
dtk = raw_data[["MaSV", "DTBTKH4"]].copy()
|
72 |
+
dtk.drop_duplicates(subset="MaSV", keep="last", inplace=True)
|
73 |
+
|
74 |
+
count_duplicates = raw_data.groupby(["MaSV", "MaMH"]).size().reset_index(name="Times")
|
75 |
+
courses = raw_data[raw_data['MaMH'].str.startswith('IT')]
|
76 |
+
courses_list=courses['MaMH'].unique().tolist()
|
77 |
+
|
78 |
+
# Create two new columns for counting courses that are in the courses_list or not
|
79 |
+
count_duplicates["fail_courses_list"] = (
|
80 |
+
(count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] >= 2)
|
81 |
+
).astype(int)
|
82 |
+
|
83 |
+
count_duplicates["fail_not_courses_list"] = (
|
84 |
+
(~count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] >= 2)
|
85 |
+
).astype(int)
|
86 |
+
|
87 |
+
count_duplicates["pass_courses"] = (
|
88 |
+
(~count_duplicates["MaMH"].isin(courses_list)) & (count_duplicates["Times"] == 1)
|
89 |
+
).astype(int)
|
90 |
+
|
91 |
+
# Group the data by "MaSV" and sum the counts for the two new columns
|
92 |
+
fail = (
|
93 |
+
count_duplicates.groupby("MaSV")[["fail_courses_list", "fail_not_courses_list"]]
|
94 |
+
.sum()
|
95 |
+
.reset_index()
|
96 |
+
)
|
97 |
+
|
98 |
+
# Rename the columns to reflect the split of courses_list and not courses_list
|
99 |
+
fail.columns = ["MaSV", "fail_courses_list_count", "fail_not_courses_list_count"]
|
100 |
+
|
101 |
+
df = pd.merge(dtk, fail, on="MaSV")
|
102 |
+
df = df.rename(columns={"DTBTKH4": "GPA"})
|
103 |
+
|
104 |
+
data = raw_data[['MaSV','NHHK','SoTCDat']]
|
105 |
+
data = data.drop_duplicates()
|
106 |
+
data = data.groupby(['MaSV'])['SoTCDat'].median().reset_index(name='Mean_Cre').round(2)
|
107 |
+
|
108 |
+
df = pd.merge(df, data, on='MaSV')
|
109 |
+
df1=raw_data[['MaSV','MaMH','NHHK']]
|
110 |
+
courses_list = raw_data[(raw_data['MaMH'].str.startswith('EN')) & ~(raw_data['MaMH'].str.contains('EN007|EN008|EN011|EN012'))].MaMH.tolist()
|
111 |
+
filtered_df = df1[df1['MaMH'].isin(courses_list)]
|
112 |
+
nhhk_counts = filtered_df.groupby('MaSV')['NHHK'].nunique().reset_index(name='EPeriod')
|
113 |
+
df = pd.merge(df, nhhk_counts, on='MaSV', how='left').fillna(0)
|
114 |
+
df=df[['MaSV','GPA' ,'Mean_Cre', 'fail_courses_list_count' ,'fail_not_courses_list_count' ,'EPeriod']]
|
115 |
+
return df
|
116 |
+
|
117 |
+
def predict_late_student(test_df):
|
118 |
+
# Load the pre-trained model
|
119 |
+
model=joblib.load("model/R_Late.joblib")
|
120 |
+
model1=joblib.load("model/R_Sem.joblib")
|
121 |
+
# Process the student data
|
122 |
+
test_dfed = process_predict_data(test_df)
|
123 |
+
|
124 |
+
# Save the student ID column
|
125 |
+
std_id = test_dfed.iloc[:, 0]
|
126 |
+
|
127 |
+
# Drop the student ID column
|
128 |
+
test_dfed = test_dfed.drop(test_dfed.columns[0], axis=1)
|
129 |
+
|
130 |
+
# Make predictions using the pre-trained model
|
131 |
+
prediction = model.predict(test_dfed)
|
132 |
+
|
133 |
+
# Add a new column to the student data indicating if the student is late
|
134 |
+
|
135 |
+
|
136 |
+
prediction1 = model1.predict(test_dfed)
|
137 |
+
|
138 |
+
# Add a new column to the student data indicating if the student is late
|
139 |
+
test_dfed['Period'] = prediction1
|
140 |
+
test_dfed['Result'] = ['late' if p == 1 else 'not late' for p in prediction]
|
141 |
+
|
142 |
+
# Add the student ID column back to the beginning of the DataFrame
|
143 |
+
test_dfed.insert(0, 'MaSV', std_id)
|
144 |
+
|
145 |
+
for index, row in test_dfed.iterrows():
|
146 |
+
if row['Period'] <= 9 and row['Result'] == 'late':
|
147 |
+
test_dfed.loc[index, 'Period'] = row['Period'] / 2
|
148 |
+
test_dfed.loc[index, 'Result'] = 'may late'
|
149 |
+
else:
|
150 |
+
test_dfed.loc[index, 'Period'] = row['Period'] / 2
|
151 |
+
|
152 |
+
return test_dfed
|
153 |
+
def predict_rank(raw_data):
|
154 |
+
# Pivot the DataFrame
|
155 |
+
raw_data = raw_data[raw_data["MaSV"].str.startswith("IT")]
|
156 |
+
raw_data = raw_data[raw_data['MaMH'].str.startswith('IT')]
|
157 |
+
pivot_df = pd.pivot_table(
|
158 |
+
raw_data, values="DiemHP", index="MaSV", columns="TenMH", aggfunc="first"
|
159 |
+
)
|
160 |
+
pivot_df = pivot_df.reset_index().rename_axis(None, axis=1)
|
161 |
+
pivot_df.columns.name = None
|
162 |
+
pivot_df = pivot_df.dropna(thresh=50, axis=1)
|
163 |
+
pivot_df = pivot_df.rename(columns=lambda x: x.strip())
|
164 |
+
|
165 |
+
pivot_df.replace("WH", np.nan, inplace=True)
|
166 |
+
pivot_df.iloc[:, 1:] = pivot_df.iloc[:, 1:].apply(pd.to_numeric)
|
167 |
+
|
168 |
+
# Merge with the XepLoaiNH column
|
169 |
+
df = pd.merge(pivot_df, raw_data[["MaSV", "DTBTK"]], on="MaSV")
|
170 |
+
df.drop_duplicates(subset="MaSV", keep="last", inplace=True)
|
171 |
+
col=df.drop(['MaSV', 'DTBTK'], axis=1)
|
172 |
+
|
173 |
+
columns_data = []
|
174 |
+
with open('column_all.txt', 'r') as f:
|
175 |
+
for line in f:
|
176 |
+
columns_data.append(str(line.strip()))
|
177 |
+
|
178 |
+
|
179 |
+
r=df.drop(columns=['MaSV','DTBTK'])
|
180 |
+
merge=r.columns.tolist()
|
181 |
+
dup=pd.DataFrame(columns=columns_data)
|
182 |
+
df= pd.merge(dup, df, on=merge, how='outer')
|
183 |
+
for col in df.columns:
|
184 |
+
if df[col].isnull().values.any():
|
185 |
+
df[col].fillna(value=df["DTBTK"], inplace=True)
|
186 |
+
std_id = df['MaSV'].copy()
|
187 |
+
df=df.drop(['MaSV', 'DTBTK'], axis=1)
|
188 |
+
df.sort_index(axis=1, inplace=True)
|
189 |
+
model=joblib.load("model/R_rank.joblib")
|
190 |
+
prediction = model.predict(df)
|
191 |
+
df['Pred Rank'] = prediction
|
192 |
+
df.insert(0, 'MaSV', std_id)
|
193 |
+
df=df[['MaSV','Pred Rank']]
|
194 |
+
return df
|
195 |
+
|
196 |
+
|
197 |
+
def predict_one_student(raw_data, student_id):
|
198 |
+
# Subset the DataFrame to relevant columns and rows
|
199 |
+
student = process_data_per(raw_data)
|
200 |
+
filtered_df = student[student["MaSV"] == student_id]
|
201 |
+
if len(filtered_df) > 0:
|
202 |
+
selected_row = filtered_df.iloc[0, 1:].dropna()
|
203 |
+
colname = filtered_df.dropna().columns.tolist()
|
204 |
+
values = selected_row.values.tolist()
|
205 |
+
|
206 |
+
# create a line chart using plotly
|
207 |
+
fig1 = go.Figure()
|
208 |
+
fig1.add_trace(go.Histogram(x=values, nbinsx=40, name=student_id,marker=dict(color='rgba(50, 100, 200, 0.7)')))
|
209 |
+
|
210 |
+
# set the chart title and axis labels
|
211 |
+
fig1.update_layout(
|
212 |
+
title="Histogram for student {}".format(student_id),
|
213 |
+
xaxis_title="Value",
|
214 |
+
yaxis_title="Frequency",
|
215 |
+
width=500
|
216 |
+
)
|
217 |
+
|
218 |
+
# create a bar chart using plotly express
|
219 |
+
data = raw_data[['MaSV', 'NHHK', 'TenMH', 'DiemHP']]
|
220 |
+
data['TenMH'] = data['TenMH'].str.lstrip()
|
221 |
+
data['NHHK'] = data['NHHK'].apply(lambda x: str(x)[:4] + ' S ' + str(x)[4:])
|
222 |
+
rows_to_drop = []
|
223 |
+
with open('rows_to_drop.txt', 'r') as f:
|
224 |
+
for line in f:
|
225 |
+
rows_to_drop.append(str(line.strip()))
|
226 |
+
data = data[~data['TenMH'].isin(rows_to_drop)]
|
227 |
+
student_data = data[data['MaSV'] == student_id][['NHHK', 'TenMH', 'DiemHP']]
|
228 |
+
student_data['DiemHP'] = pd.to_numeric(student_data['DiemHP'], errors='coerce')
|
229 |
+
|
230 |
+
fig2 = px.bar(student_data, x='TenMH', y='DiemHP', color='NHHK', title='Student Score vs. Course')
|
231 |
+
fig2.update_layout(
|
232 |
+
title="Student Score vs. Course",
|
233 |
+
xaxis_title=None,
|
234 |
+
yaxis_title="Score",
|
235 |
+
)
|
236 |
+
fig2.add_shape(
|
237 |
+
type="line",
|
238 |
+
x0=0,
|
239 |
+
y0=50,
|
240 |
+
x1=len(student_data['TenMH'])-1,
|
241 |
+
y1=50,
|
242 |
+
line=dict(color='red', width=3)
|
243 |
+
)
|
244 |
+
|
245 |
+
# display the charts using st.column
|
246 |
+
col1, col2 = st.columns(2)
|
247 |
+
with col1:
|
248 |
+
st.plotly_chart(fig1)
|
249 |
+
|
250 |
+
with col2:
|
251 |
+
st.plotly_chart(fig2)
|
252 |
+
else:
|
253 |
+
st.write("No data found for student {}".format(student_id))
|
main.py
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import plotly.express as px
|
4 |
+
import numpy as np
|
5 |
+
import plotly.graph_objs as go
|
6 |
+
from function import process_data,predict_late_student, predict_rank,predict_one_student
|
7 |
+
from datetime import datetime
|
8 |
+
from PIL import Image
|
9 |
+
import base64
|
10 |
+
from io import BytesIO
|
11 |
+
|
12 |
+
|
13 |
+
df = pd.DataFrame()
|
14 |
+
|
15 |
+
|
16 |
+
def color_cell(val):
|
17 |
+
if val == "not late":
|
18 |
+
color = "green"
|
19 |
+
elif val == "may late":
|
20 |
+
color = "yellow"
|
21 |
+
elif val == "late":
|
22 |
+
color = "red"
|
23 |
+
else:
|
24 |
+
color = "black"
|
25 |
+
return "color: %s" % color
|
26 |
+
|
27 |
+
|
28 |
+
def get_year(student_id):
|
29 |
+
return int(student_id[6:8])
|
30 |
+
|
31 |
+
|
32 |
+
def generate_comment(median):
|
33 |
+
if median < 30:
|
34 |
+
comment = f"The median score for {course} is quite low at {median}. Students may need to work harder to improve their performance."
|
35 |
+
elif median < 50:
|
36 |
+
comment = f"The median score for {course} is below average at {median}. Students should work on improving their understanding of the material."
|
37 |
+
elif median < 80:
|
38 |
+
comment = f"The median score for {course} is solid at {median}. Students are making good progress but could still work on improving their skills."
|
39 |
+
else:
|
40 |
+
comment = f"The median score for {course} is outstanding at {median}. Students are doing an excellent job in this course."
|
41 |
+
return comment
|
42 |
+
|
43 |
+
favicon = 'R.png'
|
44 |
+
|
45 |
+
st.set_page_config(
|
46 |
+
page_title='Student System',
|
47 |
+
page_icon=favicon,
|
48 |
+
layout='wide',
|
49 |
+
)
|
50 |
+
currentYear = datetime.now().year
|
51 |
+
im1 = Image.open("R.png")
|
52 |
+
|
53 |
+
# get the image from the URL
|
54 |
+
|
55 |
+
|
56 |
+
# create a three-column layout
|
57 |
+
col1, col2 = st.columns([1, 3])
|
58 |
+
|
59 |
+
# add a centered image to the first and third columns
|
60 |
+
with col1:
|
61 |
+
st.image(im1, width=150)
|
62 |
+
|
63 |
+
|
64 |
+
# add a centered title to the second column
|
65 |
+
with col2:
|
66 |
+
st.title("Student Performance Prediction System")
|
67 |
+
|
68 |
+
|
69 |
+
# Load the raw data
|
70 |
+
# uploaded_file = st.file_uploader("Choose a score file", type=["xlsx", "csv"])
|
71 |
+
|
72 |
+
# if uploaded_file is not None:
|
73 |
+
# file_contents = uploaded_file.read()
|
74 |
+
# file_ext = uploaded_file.name.split(".")[-1].lower() # Get the file extension
|
75 |
+
|
76 |
+
# if file_ext == "csv":
|
77 |
+
# df = pd.read_csv(BytesIO(file_contents))
|
78 |
+
# elif file_ext in ["xls", "xlsx"]:
|
79 |
+
# df = pd.read_excel(BytesIO(file_contents))
|
80 |
+
# else:
|
81 |
+
# st.error("Invalid file format. Please upload a CSV or Excel file.")
|
82 |
+
|
83 |
+
# raw_data = df.copy()
|
84 |
+
raw_data = pd.read_csv("All_major.csv")
|
85 |
+
st.sidebar.title("Analysis Tool")
|
86 |
+
|
87 |
+
option = ["Dashboard", "Predict"]
|
88 |
+
# Add an expander to the sidebar
|
89 |
+
tabs = st.sidebar.selectbox("Select an option", option)
|
90 |
+
|
91 |
+
|
92 |
+
# draw histogram
|
93 |
+
# Streamlit app
|
94 |
+
if tabs == "Dashboard":
|
95 |
+
# try:
|
96 |
+
|
97 |
+
df = process_data(raw_data)
|
98 |
+
unique_values_major = df["Major"].unique()
|
99 |
+
major=st.selectbox("Select a major:", unique_values_major)
|
100 |
+
if major == "All":
|
101 |
+
# If so, display the entire DataFrame
|
102 |
+
filtered_df = df.copy()
|
103 |
+
else:
|
104 |
+
# Otherwise, filter the DataFrame based on the selected value
|
105 |
+
filtered_df = df[df["Major"] == major]
|
106 |
+
filtered_df = filtered_df.dropna(axis=1, how="all")
|
107 |
+
|
108 |
+
# Select course dropdown
|
109 |
+
df=filtered_df
|
110 |
+
unique_values = df["MaSV_school"].unique()
|
111 |
+
all_values = np.concatenate([["All"],unique_values ])
|
112 |
+
school = st.selectbox("Select a school:", all_values)
|
113 |
+
if school == "All":
|
114 |
+
# If so, display the entire DataFrame
|
115 |
+
filtered_df = df.copy()
|
116 |
+
else:
|
117 |
+
# Otherwise, filter the DataFrame based on the selected value
|
118 |
+
filtered_df = df[df["MaSV_school"] == school]
|
119 |
+
filtered_df = filtered_df.dropna(axis=1, how="all")
|
120 |
+
|
121 |
+
# Select course dropdown
|
122 |
+
df=filtered_df
|
123 |
+
unique_values_year = df["Year"].unique()
|
124 |
+
all_values_year = np.concatenate([["All"],unique_values_year ])
|
125 |
+
year = st.selectbox("Select a year:", all_values_year)
|
126 |
+
|
127 |
+
if year == "All":
|
128 |
+
# If so, display the entire DataFrame
|
129 |
+
filtered_df = df.copy()
|
130 |
+
else:
|
131 |
+
# Otherwise, filter the DataFrame based on the selected value
|
132 |
+
filtered_df = df[df["Year"] == year]
|
133 |
+
filtered_df = filtered_df.dropna(axis=1, how="all")
|
134 |
+
|
135 |
+
|
136 |
+
df=filtered_df
|
137 |
+
|
138 |
+
options = df.columns[:-3]
|
139 |
+
course = st.selectbox("Select a course:", options)
|
140 |
+
|
141 |
+
# Filter the data for the selected course
|
142 |
+
course_data = df[course].dropna()
|
143 |
+
|
144 |
+
# Calculate summary statistics for the course
|
145 |
+
|
146 |
+
|
147 |
+
st.write(generate_comment(course_data.median()))
|
148 |
+
# Show summary statistics
|
149 |
+
|
150 |
+
st.write("Course:", course, " of ", school," student" )
|
151 |
+
|
152 |
+
|
153 |
+
col1, col2,col3= st.columns(3)
|
154 |
+
|
155 |
+
with col1:
|
156 |
+
fig = go.Figure()
|
157 |
+
fig.add_trace(
|
158 |
+
go.Histogram(
|
159 |
+
x=course_data, nbinsx=40, name="Histogram"
|
160 |
+
)
|
161 |
+
)
|
162 |
+
fig.update_layout(
|
163 |
+
title="Histogram of Scores for {}".format(course),
|
164 |
+
xaxis_title="Score",
|
165 |
+
yaxis_title="Count",
|
166 |
+
height=400,
|
167 |
+
width=400
|
168 |
+
)
|
169 |
+
st.plotly_chart(fig)
|
170 |
+
|
171 |
+
with col2:
|
172 |
+
fig = go.Figure()
|
173 |
+
fig.add_trace(
|
174 |
+
go.Box(
|
175 |
+
y=course_data, name="Box plot"
|
176 |
+
)
|
177 |
+
)
|
178 |
+
fig.update_layout(
|
179 |
+
title="Box plot of Scores for {}".format(course),
|
180 |
+
yaxis_title="Score",
|
181 |
+
height=400,
|
182 |
+
width=400
|
183 |
+
)
|
184 |
+
st.plotly_chart(fig)
|
185 |
+
with col3:
|
186 |
+
raw_data['MaSV_school'] = raw_data['MaSV'].str.slice(2, 4)
|
187 |
+
if school == "All":
|
188 |
+
# If so, display the entire DataFrame
|
189 |
+
data = raw_data.copy()
|
190 |
+
else:
|
191 |
+
# Otherwise, filter the DataFrame based on the selected value
|
192 |
+
data = raw_data[raw_data["MaSV_school"] == school]
|
193 |
+
df1=data[['TenMH','NHHK','DiemHP']].copy()
|
194 |
+
df1['DiemHP'] = pd.to_numeric(df1['DiemHP'], errors='coerce')
|
195 |
+
df1['NHHK'] = df1['NHHK'].apply(lambda x: str(x)[:4] + ' S ' + str(x)[4:])
|
196 |
+
selected_TenMH = " " + course
|
197 |
+
filtered_df1 = df1[df1['TenMH'] == selected_TenMH]
|
198 |
+
mean_DiemHP = filtered_df1.groupby('NHHK')['DiemHP'].mean().round(1).reset_index(name='Mean')
|
199 |
+
# Create Plotly line graph
|
200 |
+
fig = px.line(mean_DiemHP, x='NHHK', y='Mean', title=f"Mean DiemHP for{selected_TenMH} thought period")
|
201 |
+
fig.update_layout(
|
202 |
+
height=400,
|
203 |
+
width=400)
|
204 |
+
st.plotly_chart(fig)
|
205 |
+
|
206 |
+
|
207 |
+
# except:
|
208 |
+
# st.write("Add CSV to analysis")
|
209 |
+
|
210 |
+
|
211 |
+
# predict student
|
212 |
+
|
213 |
+
elif tabs == "Predict":
|
214 |
+
try:
|
215 |
+
raw_data = pd.read_csv("dataScore.csv")
|
216 |
+
predict = predict_late_student(raw_data)
|
217 |
+
rank = predict_rank(raw_data)
|
218 |
+
|
219 |
+
predict = pd.merge(predict, rank, on="MaSV")
|
220 |
+
rank_mapping = {
|
221 |
+
"Khá": "Good",
|
222 |
+
"Trung Bình Khá": "Average good",
|
223 |
+
"Giỏi": "Very good",
|
224 |
+
"Kém": "Very weak",
|
225 |
+
"Trung Bình": "Ordinary",
|
226 |
+
"Yếu": "Weak",
|
227 |
+
"Xuất Sắc": "Excellent",
|
228 |
+
}
|
229 |
+
predict["Pred Rank"].replace(rank_mapping, inplace=True)
|
230 |
+
|
231 |
+
# Filter students who have a Result value of "late"
|
232 |
+
df_late = predict
|
233 |
+
|
234 |
+
MaSV = st.text_input("Enter Student ID:")
|
235 |
+
if MaSV:
|
236 |
+
df_filtered = predict[predict["MaSV"] == MaSV]
|
237 |
+
styled_table = (
|
238 |
+
df_filtered[["MaSV", "GPA", "Mean_Cre", "Pred Rank", "Result", "Period"]]
|
239 |
+
.style.applymap(color_cell)
|
240 |
+
.format({"GPA": "{:.2f}", "Mean_Cre": "{:.1f}", "Period": "{:.1f}"})
|
241 |
+
)
|
242 |
+
|
243 |
+
with st.container():
|
244 |
+
st.write(styled_table)
|
245 |
+
predict_one_student(raw_data,MaSV)
|
246 |
+
else:
|
247 |
+
df_late = predict
|
248 |
+
# df_late = predict[(predict['Pred Rank'] == 'Yếu') | (predict['Pred Rank'] == 'Kém')]
|
249 |
+
df_late["Year"] = 2000 + df_late["MaSV"].apply(get_year)
|
250 |
+
df_late = df_late[
|
251 |
+
(df_late["Year"] != currentYear - 1) & (df_late["Year"] != currentYear - 2)
|
252 |
+
]
|
253 |
+
year = st.selectbox("Select Year", options=df_late["Year"].unique())
|
254 |
+
df_filtered = df_late[df_late["Year"] == year]
|
255 |
+
styled_table = (
|
256 |
+
df_filtered[["MaSV", "GPA", "Mean_Cre", "Pred Rank", "Result", "Period"]]
|
257 |
+
.style.applymap(color_cell)
|
258 |
+
.format({"GPA": "{:.2f}", "Mean_Cre": "{:.2f}", "Period": "{:.2f}"})
|
259 |
+
)
|
260 |
+
csv = df_filtered.to_csv(index=False)
|
261 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
262 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="Preidct data.csv">Download CSV</a>'
|
263 |
+
st.markdown(href, unsafe_allow_html=True)
|
264 |
+
fig1 = px.pie(
|
265 |
+
df_filtered,
|
266 |
+
names="Pred Rank",
|
267 |
+
title="Pred Rank",
|
268 |
+
color_discrete_sequence=px.colors.sequential.Mint,
|
269 |
+
height=400,
|
270 |
+
width=400,
|
271 |
+
)
|
272 |
+
fig2 = px.pie(
|
273 |
+
df_filtered,
|
274 |
+
names="Result",
|
275 |
+
title="Result",
|
276 |
+
color_discrete_sequence=px.colors.sequential.Peach,
|
277 |
+
height=400,
|
278 |
+
width=400,
|
279 |
+
)
|
280 |
+
fig1.update_layout(
|
281 |
+
title={
|
282 |
+
"text": "Pred Rank",
|
283 |
+
"y": 0.95,
|
284 |
+
"x": 0.5,
|
285 |
+
"xanchor": "center",
|
286 |
+
"yanchor": "top",
|
287 |
+
}
|
288 |
+
)
|
289 |
+
fig2.update_layout(
|
290 |
+
title={
|
291 |
+
"text": "Result",
|
292 |
+
"y": 0.95,
|
293 |
+
"x": 0.5,
|
294 |
+
"xanchor": "center",
|
295 |
+
"yanchor": "top",
|
296 |
+
}
|
297 |
+
)
|
298 |
+
st.dataframe(styled_table)
|
299 |
+
col1, col2 = st.columns([1, 1])
|
300 |
+
with col1:
|
301 |
+
st.plotly_chart(fig1)
|
302 |
+
with col2:
|
303 |
+
st.plotly_chart(fig2)
|
304 |
+
|
305 |
+
|
306 |
+
|
307 |
+
# display the grid of pie charts using Streamlit
|
308 |
+
|
309 |
+
except:
|
310 |
+
st.write('Add CSV to analysis')
|
model/R_Late.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d201e84514a400d73d79097b43fabf12cc96923e7abb1bc5c3be22bc5dea7445
|
3 |
+
size 497289
|
model/R_Sem.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adfbee871506a3a7e6e3ca02d7bd205cceab50fdbec47878d01773ed59dd5e7c
|
3 |
+
size 2638353
|
model/R_rank.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b2a11b69b622db9e2401c735d0bb0b4a5f791269de30c9799a0817f619cd96
|
3 |
+
size 205089
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
Cython==0.29.21
|
3 |
+
scikit-learn
|
4 |
+
pandas
|
5 |
+
plotly
|
6 |
+
scipy
|
7 |
+
pyDOE
|
8 |
+
openpyxl
|
rows_to_drop.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Intensive English 0- Twinning Program
|
2 |
+
Intensive English 01- Twinning Program
|
3 |
+
Intensive English 02- Twinning Program
|
4 |
+
Intensive English 03- Twinning Program
|
5 |
+
Intensive English 1- Twinning Program
|
6 |
+
Intensive English 2- Twinning Program
|
7 |
+
Intensive English 3- Twinning Program
|
8 |
+
Listening & Speaking IE1
|
9 |
+
Listening & Speaking IE2
|
10 |
+
Listening & Speaking IE2 (for twinning program)
|
11 |
+
Physical Training 1
|
12 |
+
Physical Training 2
|
13 |
+
Reading & Writing IE1
|
14 |
+
Reading & Writing IE2
|
15 |
+
Reading & Writing IE2 (for twinning program)
|