Update app.py
Browse files
app.py
CHANGED
@@ -1,92 +1,75 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
import gradio as gr
|
5 |
import joblib
|
6 |
import numpy as np
|
7 |
import pandas as pd
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
-
from sklearn.preprocessing import LabelEncoder
|
10 |
|
11 |
# Load the trained model and scaler objects from file
|
12 |
-
REPO_ID = "Hemg/modelxxx"
|
13 |
-
MODEL_FILENAME = "predjob.joblib"
|
14 |
-
SCALER_FILENAME = "scalejob.joblib"
|
15 |
|
16 |
model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
|
17 |
scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
18 |
|
19 |
def encode_categorical_columns(df):
|
20 |
-
# Create a copy of the DataFrame
|
21 |
-
df_encoded = df.copy()
|
22 |
-
|
23 |
-
# Convert Yes/No to 1/0 for binary columns
|
24 |
-
binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event', 'College']
|
25 |
-
for col in binary_columns:
|
26 |
-
df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0}).astype(float)
|
27 |
-
|
28 |
-
# Encode other categorical columns
|
29 |
-
categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
|
30 |
label_encoder = LabelEncoder()
|
31 |
-
for col in categorical_columns:
|
32 |
-
df_encoded[col] = label_encoder.fit_transform(df_encoded[col]).astype(float)
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
-
for col in numerical_columns:
|
37 |
-
df_encoded[col] = df_encoded[col].astype(float)
|
38 |
|
39 |
-
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
input_data = {
|
44 |
-
'Location': [Location],
|
45 |
-
'Course': [Course],
|
46 |
-
'College': [College],
|
47 |
-
'Faculty': [Faculty],
|
48 |
-
'Source': [Source],
|
49 |
-
'Event': [Event],
|
50 |
-
'Presenter': [Presenter],
|
51 |
-
'Visited Parent': [Visited_Parent],
|
52 |
-
'Visited College for Inquiry': [Visited_College_for_Inquiry],
|
53 |
-
'Attended Any Event': [Attended_Any_Event],
|
54 |
-
'College Fee': [float(College_Fee)],
|
55 |
-
'GPA': [float(GPA)],
|
56 |
-
'Year': [float(Year)]
|
57 |
-
}
|
58 |
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
print
|
|
|
62 |
print(input_df)
|
63 |
|
64 |
-
# Encode
|
65 |
-
|
66 |
|
67 |
-
print
|
68 |
-
print(
|
|
|
69 |
|
70 |
-
#
|
71 |
-
|
72 |
-
encoded_df['College Fee'] = (encoded_df['College Fee'] - 1000000) / (1700000 - 1000000)
|
73 |
|
74 |
-
#
|
75 |
-
|
76 |
|
77 |
-
#
|
78 |
-
encoded_df['GPA'] = (encoded_df['GPA'] - 2.0) / (3.0 - 2.0)
|
79 |
-
|
80 |
-
print("\nNormalized DataFrame:")
|
81 |
-
print(encoded_df)
|
82 |
-
|
83 |
-
# Make prediction
|
84 |
-
prediction = model.predict(encoded_df.astype(float))[0]
|
85 |
-
|
86 |
-
# Clip prediction between 0 and 1
|
87 |
prediction = np.clip(prediction, 0, 1)
|
88 |
|
89 |
-
|
|
|
|
|
|
|
|
|
90 |
|
91 |
return f"Chance of Admission: {prediction:.1f}"
|
92 |
|
@@ -95,21 +78,25 @@ iface = gr.Interface(
|
|
95 |
fn=predict_performance,
|
96 |
inputs=[
|
97 |
gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
|
98 |
-
gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing",
|
99 |
-
"BSc (Hons)
|
100 |
-
"
|
101 |
-
|
|
|
|
|
102 |
gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
|
103 |
gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
|
104 |
-
gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party",
|
105 |
-
label="Event"),
|
106 |
-
gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"],
|
|
|
107 |
gr.Radio(["Yes", "No"], label="Visited Parent"),
|
|
|
108 |
gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
|
|
|
|
|
109 |
gr.Radio(["Yes", "No"], label="Attended Any Event"),
|
110 |
-
gr.
|
111 |
-
gr.Slider(minimum=2.0, maximum=3.0, step=0.1, label="GPA"),
|
112 |
-
gr.Slider(minimum=2019, maximum=2025, step=1, label="Year")
|
113 |
],
|
114 |
outputs="text",
|
115 |
title="Chance of Student Admission",
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import joblib
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
|
7 |
|
8 |
# Load the trained model and scaler objects from file
|
9 |
+
REPO_ID = "Hemg/modelxxx" # hugging face repo ID
|
10 |
+
MODEL_FILENAME = "predjob.joblib" # model file name
|
11 |
+
SCALER_FILENAME = "scalejob.joblib" # scaler file name
|
12 |
|
13 |
model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
|
14 |
scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
|
15 |
|
16 |
def encode_categorical_columns(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
label_encoder = LabelEncoder()
|
|
|
|
|
18 |
|
19 |
+
# Identify categorical columns
|
20 |
+
ordinal_columns = df.select_dtypes(include=['object']).columns
|
|
|
|
|
21 |
|
22 |
+
# Encode ordinal columns using LabelEncoder
|
23 |
+
for col in ordinal_columns:
|
24 |
+
df[col] = label_encoder.fit_transform(df[col])
|
25 |
|
26 |
+
# Get nominal columns for one-hot encoding
|
27 |
+
nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
# Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
|
30 |
+
df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
|
31 |
+
|
32 |
+
return df
|
33 |
+
def predict_performance(Location, Course,Faculty, College,Source, Event, Presenter, Visited_Parent,
|
34 |
+
Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
|
35 |
+
input_data = [Location, Course, Faculty, Source, Event, Presenter, Visited_Parent,
|
36 |
+
Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
|
37 |
+
|
38 |
+
# Updated feature names to use spaces instead of underscores to match training data
|
39 |
+
feature_names = [
|
40 |
+
"Location", "Course", "Faculty", "College","Source", "Event", "Presenter",
|
41 |
+
"Visited Parent", "Visited College for Inquiry", "Attended Any Event",
|
42 |
+
"College Fee", "GPA", "Year"
|
43 |
+
]
|
44 |
+
|
45 |
+
input_df = pd.DataFrame([input_data], columns=feature_names)
|
46 |
+
|
47 |
|
48 |
+
# Debug print 2: Show DataFrame before encoding
|
49 |
+
print("\nDataFrame before encoding:")
|
50 |
print(input_df)
|
51 |
|
52 |
+
# Encode categorical columns
|
53 |
+
df = encode_categorical_columns(input_df)
|
54 |
|
55 |
+
# Debug print 3: Show DataFrame after encoding
|
56 |
+
print("\nDataFrame after encoding:")
|
57 |
+
print(df)
|
58 |
|
59 |
+
# Scale input data using the loaded scaler
|
60 |
+
scaled_input = scaler.transform(df)
|
|
|
61 |
|
62 |
+
# Make the prediction
|
63 |
+
prediction = model.predict(scaled_input)[0]
|
64 |
|
65 |
+
# Clip the prediction to be between 0 and 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
prediction = np.clip(prediction, 0, 1)
|
67 |
|
68 |
+
|
69 |
+
# Debug print
|
70 |
+
print("\nPrediction details:")
|
71 |
+
print(f"Raw prediction: {prediction}")
|
72 |
+
|
73 |
|
74 |
return f"Chance of Admission: {prediction:.1f}"
|
75 |
|
|
|
78 |
fn=predict_performance,
|
79 |
inputs=[
|
80 |
gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
|
81 |
+
gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing",
|
82 |
+
"BSc (Hons) Computing with Artificial Intelligence",
|
83 |
+
"BSc (Hons) Computer Networking & IT Security",
|
84 |
+
"BSc (Hons) Multimedia Technologies", "MBA",
|
85 |
+
"BA (Hons) Accounting & Finance",
|
86 |
+
"BA (Hons) Business Administration"], label="Course"),
|
87 |
gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
|
88 |
gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
|
89 |
+
gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party",
|
90 |
+
"Holi Festival", "Welcome Ceremony"], label="Event"),
|
91 |
+
gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"],
|
92 |
+
label="Presenter"),
|
93 |
gr.Radio(["Yes", "No"], label="Visited Parent"),
|
94 |
+
gr.Radio(["Trinity", "CCRC", "KMC", "SOS", "ISMT", "St. Xavier's", "Everest", "Prime"], label="College")
|
95 |
gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
|
96 |
+
gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party", "Holi Festival", "Welcome Ceremony"],
|
97 |
+
label="Event"),
|
98 |
gr.Radio(["Yes", "No"], label="Attended Any Event"),
|
99 |
+
gr.Radio(["Yes", "No"], label="Visited Parent")
|
|
|
|
|
100 |
],
|
101 |
outputs="text",
|
102 |
title="Chance of Student Admission",
|