Hemg commited on
Commit
e19a98b
1 Parent(s): b6911a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -73
app.py CHANGED
@@ -1,92 +1,75 @@
1
-
2
-
3
-
4
  import gradio as gr
5
  import joblib
6
  import numpy as np
7
  import pandas as pd
8
  from huggingface_hub import hf_hub_download
9
- from sklearn.preprocessing import LabelEncoder
10
 
11
  # Load the trained model and scaler objects from file
12
- REPO_ID = "Hemg/modelxxx"
13
- MODEL_FILENAME = "predjob.joblib"
14
- SCALER_FILENAME = "scalejob.joblib"
15
 
16
  model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
17
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
18
 
19
  def encode_categorical_columns(df):
20
- # Create a copy of the DataFrame
21
- df_encoded = df.copy()
22
-
23
- # Convert Yes/No to 1/0 for binary columns
24
- binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event', 'College']
25
- for col in binary_columns:
26
- df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0}).astype(float)
27
-
28
- # Encode other categorical columns
29
- categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
30
  label_encoder = LabelEncoder()
31
- for col in categorical_columns:
32
- df_encoded[col] = label_encoder.fit_transform(df_encoded[col]).astype(float)
33
 
34
- # Ensure numerical columns are float
35
- numerical_columns = ['College Fee', 'GPA', 'Year']
36
- for col in numerical_columns:
37
- df_encoded[col] = df_encoded[col].astype(float)
38
 
39
- return df_encoded
 
 
40
 
41
- def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
42
- # Create initial DataFrame
43
- input_data = {
44
- 'Location': [Location],
45
- 'Course': [Course],
46
- 'College': [College],
47
- 'Faculty': [Faculty],
48
- 'Source': [Source],
49
- 'Event': [Event],
50
- 'Presenter': [Presenter],
51
- 'Visited Parent': [Visited_Parent],
52
- 'Visited College for Inquiry': [Visited_College_for_Inquiry],
53
- 'Attended Any Event': [Attended_Any_Event],
54
- 'College Fee': [float(College_Fee)],
55
- 'GPA': [float(GPA)],
56
- 'Year': [float(Year)]
57
- }
58
 
59
- input_df = pd.DataFrame(input_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- print("\nInput DataFrame:")
 
62
  print(input_df)
63
 
64
- # Encode all categorical variables
65
- encoded_df = encode_categorical_columns(input_df)
66
 
67
- print("\nEncoded DataFrame:")
68
- print(encoded_df)
 
69
 
70
- # Normalize numerical features
71
- # College Fee normalization
72
- encoded_df['College Fee'] = (encoded_df['College Fee'] - 1000000) / (1700000 - 1000000)
73
 
74
- # Year normalization
75
- encoded_df['Year'] = (encoded_df['Year'] - 2019) / (2025 - 2019)
76
 
77
- # GPA normalization
78
- encoded_df['GPA'] = (encoded_df['GPA'] - 2.0) / (3.0 - 2.0)
79
-
80
- print("\nNormalized DataFrame:")
81
- print(encoded_df)
82
-
83
- # Make prediction
84
- prediction = model.predict(encoded_df.astype(float))[0]
85
-
86
- # Clip prediction between 0 and 1
87
  prediction = np.clip(prediction, 0, 1)
88
 
89
- print("\nPrediction:", prediction)
 
 
 
 
90
 
91
  return f"Chance of Admission: {prediction:.1f}"
92
 
@@ -95,21 +78,25 @@ iface = gr.Interface(
95
  fn=predict_performance,
96
  inputs=[
97
  gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
98
- gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing", "BSc (Hons) Computing with Artificial Intelligence",
99
- "BSc (Hons) Computer Networking & IT Security", "BSc (Hons) Multimedia Technologies", "MBA",
100
- "BA (Hons) Accounting & Finance", "BA (Hons) Business Administration"], label="Course"),
101
- gr.Radio(["Yes", "No"], label="College"),
 
 
102
  gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
103
  gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
104
- gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party", "Holi Festival", "Welcome Ceremony"],
105
- label="Event"),
106
- gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"], label="Presenter"),
 
107
  gr.Radio(["Yes", "No"], label="Visited Parent"),
 
108
  gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
 
 
109
  gr.Radio(["Yes", "No"], label="Attended Any Event"),
110
- gr.Slider(minimum=1000000, maximum=1700000, step=1000, label="College Fee"),
111
- gr.Slider(minimum=2.0, maximum=3.0, step=0.1, label="GPA"),
112
- gr.Slider(minimum=2019, maximum=2025, step=1, label="Year")
113
  ],
114
  outputs="text",
115
  title="Chance of Student Admission",
 
 
 
 
1
  import gradio as gr
2
  import joblib
3
  import numpy as np
4
  import pandas as pd
5
  from huggingface_hub import hf_hub_download
6
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
7
 
8
  # Load the trained model and scaler objects from file
9
+ REPO_ID = "Hemg/modelxxx" # hugging face repo ID
10
+ MODEL_FILENAME = "predjob.joblib" # model file name
11
+ SCALER_FILENAME = "scalejob.joblib" # scaler file name
12
 
13
  model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
14
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
15
 
16
  def encode_categorical_columns(df):
 
 
 
 
 
 
 
 
 
 
17
  label_encoder = LabelEncoder()
 
 
18
 
19
+ # Identify categorical columns
20
+ ordinal_columns = df.select_dtypes(include=['object']).columns
 
 
21
 
22
+ # Encode ordinal columns using LabelEncoder
23
+ for col in ordinal_columns:
24
+ df[col] = label_encoder.fit_transform(df[col])
25
 
26
+ # Get nominal columns for one-hot encoding
27
+ nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
30
+ df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
31
+
32
+ return df
33
+ def predict_performance(Location, Course,Faculty, College,Source, Event, Presenter, Visited_Parent,
34
+ Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
35
+ input_data = [Location, Course, Faculty, Source, Event, Presenter, Visited_Parent,
36
+ Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
37
+
38
+ # Updated feature names to use spaces instead of underscores to match training data
39
+ feature_names = [
40
+ "Location", "Course", "Faculty", "College","Source", "Event", "Presenter",
41
+ "Visited Parent", "Visited College for Inquiry", "Attended Any Event",
42
+ "College Fee", "GPA", "Year"
43
+ ]
44
+
45
+ input_df = pd.DataFrame([input_data], columns=feature_names)
46
+
47
 
48
+ # Debug print 2: Show DataFrame before encoding
49
+ print("\nDataFrame before encoding:")
50
  print(input_df)
51
 
52
+ # Encode categorical columns
53
+ df = encode_categorical_columns(input_df)
54
 
55
+ # Debug print 3: Show DataFrame after encoding
56
+ print("\nDataFrame after encoding:")
57
+ print(df)
58
 
59
+ # Scale input data using the loaded scaler
60
+ scaled_input = scaler.transform(df)
 
61
 
62
+ # Make the prediction
63
+ prediction = model.predict(scaled_input)[0]
64
 
65
+ # Clip the prediction to be between 0 and 1
 
 
 
 
 
 
 
 
 
66
  prediction = np.clip(prediction, 0, 1)
67
 
68
+
69
+ # Debug print
70
+ print("\nPrediction details:")
71
+ print(f"Raw prediction: {prediction}")
72
+
73
 
74
  return f"Chance of Admission: {prediction:.1f}"
75
 
 
78
  fn=predict_performance,
79
  inputs=[
80
  gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
81
+ gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing",
82
+ "BSc (Hons) Computing with Artificial Intelligence",
83
+ "BSc (Hons) Computer Networking & IT Security",
84
+ "BSc (Hons) Multimedia Technologies", "MBA",
85
+ "BA (Hons) Accounting & Finance",
86
+ "BA (Hons) Business Administration"], label="Course"),
87
  gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
88
  gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
89
+ gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party",
90
+ "Holi Festival", "Welcome Ceremony"], label="Event"),
91
+ gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"],
92
+ label="Presenter"),
93
  gr.Radio(["Yes", "No"], label="Visited Parent"),
94
+ gr.Radio(["Trinity", "CCRC", "KMC", "SOS", "ISMT", "St. Xavier's", "Everest", "Prime"], label="College")
95
  gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
96
+ gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party", "Holi Festival", "Welcome Ceremony"],
97
+ label="Event"),
98
  gr.Radio(["Yes", "No"], label="Attended Any Event"),
99
+ gr.Radio(["Yes", "No"], label="Visited Parent")
 
 
100
  ],
101
  outputs="text",
102
  title="Chance of Student Admission",