Spaces:

aiEDUcurriculum
/

introtoAI-clubs-project

Running

App Files Files Community

aiEDUcurriculum commited on 27 days ago

Commit

7273ceb

verified ·

1 Parent(s): fcd9be8

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -87

app.py CHANGED Viewed

@@ -1,7 +1,3 @@
-### ----------------------------- ###
-###           libraries           ###
-### ----------------------------- ###
 import gradio as gr
 import pandas as pd
 import numpy as np
@@ -9,121 +5,111 @@ from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
 from sklearn import metrics
-### ------------------------------ ###
-###       data transformation      ###
-### ------------------------------ ###
-# load dataset
 data = pd.read_csv('data.csv')
-# remove timestamp from dataset (first column)
 data = data.iloc[:, 1:]
-# create a copy for transformed data
 transformed_data = pd.DataFrame()
-# keep track of which columns are categorical and what
-# those columns' value mappings are
 cat_value_dicts = {}
-final_colname = data.columns[-1]  # club recommendation
-# for each column...
 for colname in data.columns:
     if pd.api.types.is_numeric_dtype(data[colname]):
-        transformed_data[colname] = data[colname].copy()
         continue
-    # Create mapping for categorical variables
-    unique_vals = data[colname].unique()
-    val_dict = {val: idx for idx, val in enumerate(sorted(unique_vals))}
-    # If it's the target column, store the reverse mapping
     if colname == final_colname:
-        val_dict = {idx: val for val, idx in val_dict.items()}
-    cat_value_dicts[colname] = val_dict
     transformed_data[colname] = data[colname].map(val_dict)
-### -------------------------------- ###
-###           model training         ###
-### -------------------------------- ###
-# select features and prediction
-X = transformed_data.iloc[:, :-1]  # all columns except last
-y = transformed_data.iloc[:, -1]   # last column
-# split data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
-# train the model
-model = LogisticRegression(max_iter=1000)
 model.fit(X_train, y_train)
 y_pred = model.predict(X_test)
-### -------------------------------- ###
-###        model evaluation         ###
-### -------------------------------- ###
 def get_feat():
     feats = [abs(x) for x in model.coef_[0]]
     max_val = max(feats)
     idx = feats.index(max_val)
     return data.columns[idx]
 acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
 most_imp_feat = get_feat()
-### ------------------------------- ###
-###        predictor function      ###
-### ------------------------------- ###
 def predict(*args):
-    features = []
-    # transform categorical input using our mappings
-    for colname, arg in zip(data.columns[:-1], args):
-        if arg is None:
-            return "Please fill in all fields"
-        if colname in cat_value_dicts:
-            if arg not in cat_value_dicts[colname]:
-                return f"Invalid value for {colname}"
-            features.append(cat_value_dicts[colname][arg])
-        else:
-            try:
-                features.append(float(arg))
-            except:
-                return f"Invalid numeric value for {colname}"
-    # predict using the model
     try:
-        new_input = [features]
-        result = model.predict(new_input)
         return cat_value_dicts[final_colname][result[0]]
     except Exception as e:
         return f"Error making prediction: {str(e)}"
-### ------------------------------- ###
-###        interface creation      ###
-### ------------------------------- ###
-block = gr.Blocks()
-with block:
     gr.Markdown("# Club Recommendation System")
     gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
     with gr.Row():
-        with gr.Column(variant="panel"):  # Changed from Box to Column with panel variant
             inputls = []
-            # Create input components for each feature
-            for colname in data.columns[:-1]:  # Exclude the target column
                 if colname in cat_value_dicts:
                     choices = list(cat_value_dicts[colname].keys())
                     inputls.append(gr.Dropdown(
@@ -146,27 +132,25 @@ with block:
             gr.Markdown("<br />")
             with gr.Row():
-                with gr.Column(variant="panel"):  # Changed from Box to Column
                     gr.Markdown(f"### Model Accuracy\n{acc}")
-                with gr.Column(variant="panel"):  # Changed from Box to Column
                     gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
             gr.Markdown("<br />")
-            with gr.Column(variant="panel"):  # Changed from Box to Column
-                gr.Markdown('''⭐ Note that model accuracy is based on the uploaded data.csv and reflects how well
-                           the AI model can give correct recommendations for <em>that dataset</em>. Model accuracy
-                           and most important feature can be helpful for understanding how the model works, but
-                           <em>should not be considered absolute facts about the real world</em>.''')
-        with gr.Column(variant="panel"):  # Changed from Box to Column
             gr.Markdown("""
             # About the Club Recommendation System
             This system uses machine learning to suggest clubs based on your preferences and personality.
             Fill out the questionnaire on the left to get your personalized recommendation.
-            The system takes into account factors like:
             - Your social preferences
             - Activity preferences
             - Personal strengths

 import gradio as gr
 import pandas as pd
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from sklearn import metrics
+# Load dataset
+print("Loading data...")
 data = pd.read_csv('data.csv')
+print(f"Initial shape: {data.shape}")
+# Remove timestamp and any rows with missing values
 data = data.iloc[:, 1:]
+data = data.dropna()
+print(f"Shape after removing timestamp and NaN: {data.shape}")
+# Create transformed dataframe
 transformed_data = pd.DataFrame()
 cat_value_dicts = {}
+final_colname = data.columns[-1]
+print("\nProcessing columns:")
 for colname in data.columns:
+    print(f"\nColumn: {colname}")
+    print(f"Unique values: {data[colname].unique()}")
     if pd.api.types.is_numeric_dtype(data[colname]):
+        transformed_data[colname] = data[colname]
+        print("Numeric column - copied directly")
         continue
+    # Handle categorical variables
+    unique_vals = sorted(data[colname].dropna().unique())
+    print(f"Categorical values: {unique_vals}")
     if colname == final_colname:
+        # For target column, create both mappings
+        val_dict = {val: idx for idx, val in enumerate(unique_vals)}
+        cat_value_dicts[colname] = {idx: val for idx, val in enumerate(unique_vals)}
+    else:
+        # For feature columns, create forward mapping only
+        val_dict = {val: idx for idx, val in enumerate(unique_vals)}
+        cat_value_dicts[colname] = val_dict
     transformed_data[colname] = data[colname].map(val_dict)
+    print(f"Mapping created: {val_dict}")
+print("\nChecking for NaN values in transformed data:")
+print(transformed_data.isnull().sum())
+# Remove any remaining NaN values
+transformed_data = transformed_data.dropna()
+print(f"\nFinal transformed shape: {transformed_data.shape}")
+# Separate features and target
+X = transformed_data.iloc[:, :-1]
+y = transformed_data.iloc[:, -1]
+print(f"\nFeatures shape: {X.shape}")
+print(f"Target shape: {y.shape}")
+# Convert to numpy arrays
+X = X.to_numpy()
+y = y.to_numpy()
+# Split and train
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
+model = LogisticRegression(max_iter=2000)
 model.fit(X_train, y_train)
 y_pred = model.predict(X_test)
 def get_feat():
     feats = [abs(x) for x in model.coef_[0]]
     max_val = max(feats)
     idx = feats.index(max_val)
     return data.columns[idx]
 acc = str(round(metrics.accuracy_score(y_test, y_pred) * 100, 1)) + "%"
 most_imp_feat = get_feat()
 def predict(*args):
     try:
+        features = []
+        for colname, arg in zip(data.columns[:-1], args):
+            if arg is None or pd.isna(arg):
+                return "Please fill in all fields"
+            if colname in cat_value_dicts:
+                if arg not in cat_value_dicts[colname]:
+                    return f"Invalid value for {colname}"
+                features.append(cat_value_dicts[colname][arg])
+            else:
+                try:
+                    features.append(float(arg))
+                except:
+                    return f"Invalid numeric value for {colname}"
+        result = model.predict([features])
         return cat_value_dicts[final_colname][result[0]]
     except Exception as e:
         return f"Error making prediction: {str(e)}"
+# Create interface
+with gr.Blocks() as block:
     gr.Markdown("# Club Recommendation System")
     gr.Markdown("Take the quiz to get a personalized club recommendation using AI.")
     with gr.Row():
+        with gr.Column(variant="panel"):
             inputls = []
+            for colname in data.columns[:-1]:
                 if colname in cat_value_dicts:
                     choices = list(cat_value_dicts[colname].keys())
                     inputls.append(gr.Dropdown(
             gr.Markdown("<br />")
             with gr.Row():
+                with gr.Column(variant="panel"):
                     gr.Markdown(f"### Model Accuracy\n{acc}")
+                with gr.Column(variant="panel"):
                     gr.Markdown(f"### Most Important Feature\n{most_imp_feat}")
             gr.Markdown("<br />")
+            with gr.Column(variant="panel"):
+                gr.Markdown('''⭐ Note that model accuracy is based on the training data and reflects how well
+                           the AI model can give correct recommendations for <em>that dataset</em>.''')
+        with gr.Column(variant="panel"):
             gr.Markdown("""
             # About the Club Recommendation System
             This system uses machine learning to suggest clubs based on your preferences and personality.
             Fill out the questionnaire on the left to get your personalized recommendation.
+            The system considers:
             - Your social preferences
             - Activity preferences
             - Personal strengths