Spaces:

Hemg
/

chances_of_student_admission

Running

App Files Files Community

Hemg commited on Nov 13

Commit

81fa7c6

•

1 Parent(s): 885941f

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -25

app.py CHANGED Viewed

@@ -14,36 +14,56 @@ model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
-    label_encoder = LabelEncoder()
-    # Identify categorical columns
-    ordinal_columns = df.select_dtypes(include=['object']).columns
-    # Encode ordinal columns using LabelEncoder
-    for col in ordinal_columns:
-        df[col] = label_encoder.fit_transform(df[col])
-    # Get nominal columns for one-hot encoding
-    nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
-    # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
-    df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
     return df
 def predict_performance(Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
                        Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
-    input_data = [Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
-                  Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
-    # Updated feature names to use spaces instead of underscores to match training data
-    feature_names = [
-        "Location", "Course", "Faculty", "College","Source", "Event", "Presenter",
-        "Visited Parent", "Visited College for Inquiry", "Attended Any Event",
-        "College Fee", "GPA", "Year"
-    ]
-    input_df = pd.DataFrame([input_data], columns=feature_names)
     # Debug print 2: Show DataFrame before encoding
     print("\nDataFrame before encoding:")
@@ -65,11 +85,9 @@ def predict_performance(Location, Course, Faculty, College, Source, Event, Prese
     # Clip the prediction to be between 0 and 1
     prediction = np.clip(prediction, 0, 1)
     # Debug print
     print("\nPrediction details:")
     print(f"Raw prediction: {prediction}")
     return f"Chance of Admission: {prediction:.1f}"

 scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
 def encode_categorical_columns(df):
+    # Create a copy of the DataFrame to avoid modifying the original
+    df = df.copy()
+    # Define the expected categorical columns and their order
+    categorical_columns = [
+        "Location", "Course", "Faculty", "College", "Source", "Event",
+        "Presenter", "Visited Parent", "Visited College for Inquiry",
+        "Attended Any Event"
+    ]
+    # Define the expected numeric columns
+    numeric_columns = ["College Fee", "GPA", "Year"]
+    # Create label encoder dictionary
+    label_encoders = {}
+    # Encode each categorical column
+    for col in categorical_columns:
+        label_encoders[col] = LabelEncoder()
+        df[col] = label_encoders[col].fit_transform(df[col].astype(str))
+    # Ensure numeric columns are float type
+    for col in numeric_columns:
+        df[col] = df[col].astype(float)
+    # Ensure columns are in the correct order
+    df = df[categorical_columns + numeric_columns]
     return df
 def predict_performance(Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
                        Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
+    # Create input DataFrame with consistent column names
+    input_data = {
+        "Location": [Location],
+        "Course": [Course],
+        "Faculty": [Faculty],
+        "College": [College],
+        "Source": [Source],
+        "Event": [Event],
+        "Presenter": [Presenter],
+        "Visited Parent": [Visited_Parent],
+        "Visited College for Inquiry": [Visited_College_for_Inquiry],
+        "Attended Any Event": [Attended_Any_Event],
+        "College Fee": [float(College_Fee)],
+        "GPA": [float(GPA)],
+        "Year": [float(Year)]
+    }
+    input_df = pd.DataFrame(input_data)
     # Debug print 2: Show DataFrame before encoding
     print("\nDataFrame before encoding:")
     # Clip the prediction to be between 0 and 1
     prediction = np.clip(prediction, 0, 1)
     # Debug print
     print("\nPrediction details:")
     print(f"Raw prediction: {prediction}")
     return f"Chance of Admission: {prediction:.1f}"