Hemg commited on
Commit
81fa7c6
1 Parent(s): 885941f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -25
app.py CHANGED
@@ -14,36 +14,56 @@ model = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME))
14
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
15
 
16
  def encode_categorical_columns(df):
17
- label_encoder = LabelEncoder()
 
18
 
19
- # Identify categorical columns
20
- ordinal_columns = df.select_dtypes(include=['object']).columns
 
 
 
 
21
 
22
- # Encode ordinal columns using LabelEncoder
23
- for col in ordinal_columns:
24
- df[col] = label_encoder.fit_transform(df[col])
25
-
26
- # Get nominal columns for one-hot encoding
27
- nominal_columns = df.select_dtypes(include=['object']).columns.difference(ordinal_columns)
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Apply one-hot encoding to nominal columns (drop the first column to avoid multicollinearity)
30
- df = pd.get_dummies(df, columns=nominal_columns, drop_first=True)
31
-
32
  return df
 
33
  def predict_performance(Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
34
  Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
35
- input_data = [Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
36
- Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year]
37
-
38
- # Updated feature names to use spaces instead of underscores to match training data
39
- feature_names = [
40
- "Location", "Course", "Faculty", "College","Source", "Event", "Presenter",
41
- "Visited Parent", "Visited College for Inquiry", "Attended Any Event",
42
- "College Fee", "GPA", "Year"
43
- ]
 
 
 
 
 
 
 
44
 
45
- input_df = pd.DataFrame([input_data], columns=feature_names)
46
-
47
 
48
  # Debug print 2: Show DataFrame before encoding
49
  print("\nDataFrame before encoding:")
@@ -65,11 +85,9 @@ def predict_performance(Location, Course, Faculty, College, Source, Event, Prese
65
  # Clip the prediction to be between 0 and 1
66
  prediction = np.clip(prediction, 0, 1)
67
 
68
-
69
  # Debug print
70
  print("\nPrediction details:")
71
  print(f"Raw prediction: {prediction}")
72
-
73
 
74
  return f"Chance of Admission: {prediction:.1f}"
75
 
 
14
  scaler = joblib.load(hf_hub_download(repo_id=REPO_ID, filename=SCALER_FILENAME))
15
 
16
  def encode_categorical_columns(df):
17
+ # Create a copy of the DataFrame to avoid modifying the original
18
+ df = df.copy()
19
 
20
+ # Define the expected categorical columns and their order
21
+ categorical_columns = [
22
+ "Location", "Course", "Faculty", "College", "Source", "Event",
23
+ "Presenter", "Visited Parent", "Visited College for Inquiry",
24
+ "Attended Any Event"
25
+ ]
26
 
27
+ # Define the expected numeric columns
28
+ numeric_columns = ["College Fee", "GPA", "Year"]
29
+
30
+ # Create label encoder dictionary
31
+ label_encoders = {}
32
+
33
+ # Encode each categorical column
34
+ for col in categorical_columns:
35
+ label_encoders[col] = LabelEncoder()
36
+ df[col] = label_encoders[col].fit_transform(df[col].astype(str))
37
+
38
+ # Ensure numeric columns are float type
39
+ for col in numeric_columns:
40
+ df[col] = df[col].astype(float)
41
+
42
+ # Ensure columns are in the correct order
43
+ df = df[categorical_columns + numeric_columns]
44
 
 
 
 
45
  return df
46
+
47
  def predict_performance(Location, Course, Faculty, College, Source, Event, Presenter, Visited_Parent,
48
  Visited_College_for_Inquiry, Attended_Any_Event, College_Fee, GPA, Year):
49
+ # Create input DataFrame with consistent column names
50
+ input_data = {
51
+ "Location": [Location],
52
+ "Course": [Course],
53
+ "Faculty": [Faculty],
54
+ "College": [College],
55
+ "Source": [Source],
56
+ "Event": [Event],
57
+ "Presenter": [Presenter],
58
+ "Visited Parent": [Visited_Parent],
59
+ "Visited College for Inquiry": [Visited_College_for_Inquiry],
60
+ "Attended Any Event": [Attended_Any_Event],
61
+ "College Fee": [float(College_Fee)],
62
+ "GPA": [float(GPA)],
63
+ "Year": [float(Year)]
64
+ }
65
 
66
+ input_df = pd.DataFrame(input_data)
 
67
 
68
  # Debug print 2: Show DataFrame before encoding
69
  print("\nDataFrame before encoding:")
 
85
  # Clip the prediction to be between 0 and 1
86
  prediction = np.clip(prediction, 0, 1)
87
 
 
88
  # Debug print
89
  print("\nPrediction details:")
90
  print(f"Raw prediction: {prediction}")
 
91
 
92
  return f"Chance of Admission: {prediction:.1f}"
93