Spaces:

debjaninath
/

ml_python

Runtime error

App Files Files Community

debjaninath commited on Jun 13, 2024

Commit

81448e1

verified ·

1 Parent(s): 31008d9

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Import the libraries
+import gradio as gr
+import pandas as pd
+import joblib
+from sklearn.preprocessing import OneHotEncoder
+import subprocess
+import json
+import uuid
+from pathlib import Path
+from huggingface_hub import CommitScheduler
+# Run the training script placed in the same directory as app.py
+# The training script will train and persist a linear regression
+# model with the filename 'model.joblib'
+# Load the freshly trained model from disk
+# model = joblib.load('/content/dt_regressor.pkl') # Uncomment this line to use Decision Tree model
+model = joblib.load('model.joblib') # Linear Regression model
+# Prepare the logging functionality
+log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
+log_folder = log_file.parent
+scheduler = CommitScheduler(
+    repo_id="debjaninath/insurance-charge-mlops-logs",  # provide a name "insurance-charge-mlops-logs" for the repo_id
+    repo_type="dataset",
+    folder_path=log_folder,
+    path_in_repo="data",
+    every=2
+)
+# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
+# the functions runs when 'Submit' is clicked or when a API request is made
+def predict_charges(age, bmi, children, sex, smoker, region):
+    try:
+        # Create a DataFrame from the input features
+        data = pd.DataFrame({
+            'age': [age],
+            'bmi': [bmi],
+            'children': [children],
+            'sex': [sex],
+            'smoker': [smoker],
+            'region': [region]
+        })
+        # Handle categorical variables using one-hot encoding
+        data = pd.get_dummies(data)
+        # Ensure the input data has the same features as the training data
+        train_columns = model.feature_names_in_
+        missing_columns = set(train_columns) - set(data.columns)
+        for column in missing_columns:
+            data[column] = 0
+        data = data[train_columns]
+        print("Input data:")
+        print(data)
+        # Make predictions using the loaded model
+        prediction = model.predict(data)
+        print("Prediction:", prediction)
+        # Check if prediction is not None and has at least one element
+        if prediction is not None and len(prediction) > 0:
+            # While the prediction is made, log both the inputs and outputs to a log file
+            # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
+            # access
+            with scheduler.lock:
+                with log_file.open("a") as f:
+                    f.write(json.dumps(
+                        {
+                            'age': age,
+                            'bmi': bmi,
+                            'children': children,
+                            'sex': sex,
+                            'smoker': smoker,
+                            'region': region,
+                            'prediction': prediction[0]
+                        }
+                    ))
+                    f.write("\n")
+            return float(prediction[0])