Spaces:

Hemg
/

chances_of_student_admission

Running

App Files Files Community

Hemg commited on Nov 13

Commit

e19a98b

•

1 Parent(s): b6911a4

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -73

app.py CHANGED Viewed

@@ -1,92 +1,75 @@
 import gradio as gr
 import joblib
 import numpy as np
 import pandas as pd
 from huggingface_hub import hf_hub_download
-from sklearn.preprocessing import LabelEncoder
 # Load the trained model and scaler objects from file
-REPO_ID = "Hemg/modelxxx"
-MODEL_FILENAME = "predjob.joblib"
-SCALER_FILENAME = "scalejob.joblib"
 model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
-    # Create a copy of the DataFrame
-    df_encoded = df.copy()
-    # Convert Yes/No to 1/0 for binary columns
-    binary_columns = ['Visited Parent', 'Visited College for Inquiry', 'Attended Any Event', 'College']
-    for col in binary_columns:
-        df_encoded[col] = df_encoded[col].map({'Yes': 1, 'No': 0}).astype(float)
-    # Encode other categorical columns
-    categorical_columns = ['Location', 'Course', 'Faculty', 'Source', 'Event', 'Presenter']
     label_encoder = LabelEncoder()
-    for col in categorical_columns:
-        df_encoded[col] = label_encoder.fit_transform(df_encoded[col]).astype(float)
-    # Ensure numerical columns are float
-    numerical_columns = ['College Fee', 'GPA', 'Year']
-    for col in numerical_columns:
-        df_encoded[col] = df_encoded[col].astype(float)
-    return df_encoded
-def predict_performance(Location, Course, College, Faculty, Source, Event, Presenter, Visited_Parent, Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
-    # Create initial DataFrame
-    input_data = {
-        'Location': [Location],
-        'Course': [Course],
-        'College': [College],
-        'Faculty': [Faculty],
-        'Source': [Source],
-        'Event': [Event],
-        'Presenter': [Presenter],
-        'Visited Parent': [Visited_Parent],
-        'Visited College for Inquiry': [Visited_College_for_Inquiry],
-        'Attended Any Event': [Attended_Any_Event],
-        'College Fee': [float(College_Fee)],
-        'GPA': [float(GPA)],
-        'Year': [float(Year)]
-    }
-    input_df = pd.DataFrame(input_data)
-    print("\nInput DataFrame:")
     print(input_df)
-    # Encode all categorical variables
-    encoded_df = encode_categorical_columns(input_df)
-    print("\nEncoded DataFrame:")
-    print(encoded_df)
-    # Normalize numerical features
-    # College Fee normalization
-    encoded_df['College Fee'] = (encoded_df['College Fee'] - 1000000) / (1700000 - 1000000)
-    # Year normalization
-    encoded_df['Year'] = (encoded_df['Year'] - 2019) / (2025 - 2019)
-    # GPA normalization
-    encoded_df['GPA'] = (encoded_df['GPA'] - 2.0) / (3.0 - 2.0)
-    print("\nNormalized DataFrame:")
-    print(encoded_df)
-    # Make prediction
-    prediction = model.predict(encoded_df.astype(float))[0]
-    # Clip prediction between 0 and 1
     prediction = np.clip(prediction, 0, 1)
-    print("\nPrediction:", prediction)
     return f"Chance of Admission: {prediction:.1f}"
@@ -95,21 +78,25 @@ iface = gr.Interface(
     fn=predict_performance,
     inputs=[
         gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
-        gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing", "BSc (Hons) Computing with Artificial Intelligence",
-                 "BSc (Hons) Computer Networking & IT Security", "BSc (Hons) Multimedia Technologies", "MBA",
-                 "BA (Hons) Accounting & Finance", "BA (Hons) Business Administration"], label="Course"),
-        gr.Radio(["Yes", "No"], label="College"),
         gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
         gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
-        gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party", "Holi Festival", "Welcome Ceremony"],
-                 label="Event"),
-        gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"], label="Presenter"),
         gr.Radio(["Yes", "No"], label="Visited Parent"),
         gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
         gr.Radio(["Yes", "No"], label="Attended Any Event"),
-        gr.Slider(minimum=1000000, maximum=1700000, step=1000, label="College Fee"),
-        gr.Slider(minimum=2.0, maximum=3.0, step=0.1, label="GPA"),
-        gr.Slider(minimum=2019, maximum=2025, step=1, label="Year")
     ],
     outputs="text",
     title="Chance of Student Admission",

 import gradio as gr
 import joblib
 import numpy as np
 import pandas as pd
 from huggingface_hub import hf_hub_download
+from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
 # Load the trained model and scaler objects from file
+REPO_ID = "Hemg/modelxxx"  # hugging face repo ID
+MODEL_FILENAME = "predjob.joblib"  # model file name
+SCALER_FILENAME = "scalejob.joblib"  # scaler file name
 model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
     label_encoder = LabelEncoder()
+    # Identify categorical columns
+    ordinal_columns = df.select_dtypes(include=['object']).columns
+    # Encode ordinal columns using LabelEncoder
+    for col in ordinal_columns:
+        df[col] = label_encoder.fit_transform(df[col])
+    # Get nominal columns for one-hot encoding
+    nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
+    # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
+    df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
+    return df
+def predict_performance(Location, Course,Faculty, College,Source, Event, Presenter, Visited_Parent,
+                       Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
+    input_data = [Location, Course, Faculty, Source, Event, Presenter, Visited_Parent,
+                  Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
+    # Updated feature names to use spaces instead of underscores to match training data
+    feature_names = [
+        "Location", "Course", "Faculty", "College","Source", "Event", "Presenter",
+        "Visited Parent", "Visited College for Inquiry", "Attended Any Event",
+        "College Fee", "GPA", "Year"
+    ]
+    input_df = pd.DataFrame([input_data], columns=feature_names)
+    # Debug print 2: Show DataFrame before encoding
+    print("\nDataFrame before encoding:")
     print(input_df)
+    # Encode categorical columns
+    df = encode_categorical_columns(input_df)
+    # Debug print 3: Show DataFrame after encoding
+    print("\nDataFrame after encoding:")
+    print(df)
+    # Scale input data using the loaded scaler
+    scaled_input = scaler.transform(df)
+    # Make the prediction
+    prediction = model.predict(scaled_input)[0]
+    # Clip the prediction to be between 0 and 1
     prediction = np.clip(prediction, 0, 1)
+    # Debug print
+    print("\nPrediction details:")
+    print(f"Raw prediction: {prediction}")
     return f"Chance of Admission: {prediction:.1f}"
     fn=predict_performance,
     inputs=[
         gr.Radio(["Kathmandu", "Bhaktapur", "Lalitpur", "Kritipur"], label="Location"),
+        gr.Radio(["MSc IT & Applied Security", "BSc (Hons) Computing",
+                 "BSc (Hons) Computing with Artificial Intelligence",
+                 "BSc (Hons) Computer Networking & IT Security",
+                 "BSc (Hons) Multimedia Technologies", "MBA",
+                 "BA (Hons) Accounting & Finance",
+                 "BA (Hons) Business Administration"], label="Course"),
         gr.Radio(["Science", "Management", "Humanities"], label="Faculty"),
         gr.Radio(["Event", "Facebook", "Instagram", "Offline", "Recommendation"], label="Source"),
+        gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party",
+                 "Holi Festival", "Welcome Ceremony"], label="Event"),
+        gr.Radio(["Ram", "Gita", "Manish", "Shyam", "Raj", "Hari", "Rina", "Shree"],
+                label="Presenter"),
         gr.Radio(["Yes", "No"], label="Visited Parent"),
+        gr.Radio(["Trinity", "CCRC", "KMC", "SOS", "ISMT", "St. Xavier's", "Everest", "Prime"], label="College")
         gr.Radio(["Yes", "No"], label="Visited College for Inquiry"),
+        gr.Radio(["New Year", "Dashain", "Orientation", "Fresher's Party", "Holi Festival", "Welcome Ceremony"],
+                 label="Event"),
         gr.Radio(["Yes", "No"], label="Attended Any Event"),
+        gr.Radio(["Yes", "No"], label="Visited Parent")
     ],
     outputs="text",
     title="Chance of Student Admission",