transexpress_ml_api

Running

App Files Files Community

Arafath10 commited on Apr 22

Commit

5cb1a46

•

1 Parent(s): aee20ee

Update main.py

Browse files

Files changed (1) hide show

main.py +188 -23

main.py CHANGED Viewed

@@ -1,22 +1,21 @@
-from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
 import pandas as pd
 import numpy as np
-import joblib
-# Load your trained model and encoders
-xgb_model = joblib.load("model/transexpress_xgb_model.joblib")
-encoders = joblib.load("model/transexpress_encoders.joblib")
-# Function to handle unseen labels during encoding
-def safe_transform(encoder, column):
-    classes = encoder.classes_
-    return [encoder.transform([x])[0] if x in classes else -1 for x in column]
-# Define FastAPI app
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -25,31 +24,198 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
     customer_name: str,
     customer_address: str,
     customer_phone: str,
-    customer_email: str,
     weight: int,
     cod: int,
     pickup_address: str,
-    destination_city_name: str):
-    # Convert input data to DataFrame
-    if not destination_city_name.strip():
-        destination_city_name = 'Missing'
-    print(destination_city_name)
     input_data = {
         'customer_name': customer_name,
         'customer_address': customer_address,
         'customer_phone_no': customer_phone,
-        'client_email': customer_email,
         'weight': weight,
         'cod': cod,
         'pickup_address':pickup_address,
-        'destination_branch_name':destination_city_name
     }
     input_df = pd.DataFrame([input_data])
@@ -65,9 +231,8 @@ def predict(
     # Output
     predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
     probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
-    print(input_data,predicted_status,probability)
     if predicted_status == "Returned to Client":
        probability = 100 - probability
-    return {"Probability": round(probability,2)}

+import asyncio
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+import requests
 import pandas as pd
+import json
+import httpx,os,datetime
+import pandas as pd
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.preprocessing import LabelEncoder
+from xgboost import XGBClassifier
+from sklearn.metrics import accuracy_score, classification_report
+from joblib import dump, load
 import numpy as np
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+def train_the_model(data):
+    try:
+        new_data = data
+        encoders = load('transexpress_encoders.joblib')
+        xgb_model = load('transexpress_xgb_model.joblib')
+        selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
+                            'weight','cod','pickup_address','client_number','destination_city',
+                            'status_name']
+        new_data_filled = new_data[selected_columns].fillna('Missing')
+        for col, encoder in encoders.items():
+            if col in new_data_filled.columns:
+                unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
+                if unseen_categories:
+                    for category in unseen_categories:
+                        encoder.classes_ = np.append(encoder.classes_, category)
+                    new_data_filled[col] = encoder.transform(new_data_filled[col])
+                else:
+                    new_data_filled[col] = encoder.transform(new_data_filled[col])
+        X_new = new_data_filled.drop('status_name', axis=1)
+        y_new = new_data_filled['status_name']
+        X_train, X_test, y_train, y_test = train_test_split(X_new,y_new, test_size=0.2, random_state=42)
+        xgb_model.fit(X_new, y_new)
+        dump(xgb_model,'transexpress_xgb_model.joblib')
+        y_pred = xgb_model.predict(X_test)
+        accuracy = accuracy_score(y_test, y_pred)
+        classification_rep = classification_report(y_test, y_pred)
+        return accuracy,classification_rep,"Model finetuned with new data."
+    except:
+        data = data
+        # Select columns
+        selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
+                            'weight','cod','pickup_address','client_number','destination_city',
+                            'status_name']
+        # Handling missing values
+        data_filled = data[selected_columns].fillna('Missing')
+        # Encoding categorical variables
+        encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
+        for col, encoder in encoders.items():
+            data_filled[col] = encoder.fit_transform(data_filled[col])
+        # Splitting the dataset
+        X = data_filled.drop('status_name', axis=1)
+        y = data_filled['status_name']
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        # Setup the hyperparameter grid to search
+        param_grid = {
+            'max_depth': [3, 4, 5],
+            'learning_rate': [0.01, 0.1, 0.4],
+            'n_estimators': [100, 200, 300],
+            'subsample': [0.8, 0.9, 1],
+            'colsample_bytree': [0.3, 0.7]
+        }
+        # Initialize the classifier
+        xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
+        # Setup GridSearchCV
+        grid_search = GridSearchCV(xgb, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
+        # Fit the grid search to the data
+        grid_search.fit(X_train, y_train)
+        # Get the best parameters
+        best_params = grid_search.best_params_
+        print("Best parameters:", best_params)
+        # Train the model with best parameters
+        best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
+        best_xgb.fit(X_train, y_train)
+        # Predict on the test set
+        y_pred = best_xgb.predict(X_test)
+        y_pred_proba = best_xgb.predict_proba(X_test)
+        # Evaluate the model
+        accuracy = accuracy_score(y_test, y_pred)
+        classification_rep = classification_report(y_test, y_pred)
+        # Save the model
+        model_filename = 'transexpress_xgb_model.joblib'
+        dump(best_xgb, model_filename)
+        # Save the encoders
+        encoders_filename = 'transexpress_encoders.joblib'
+        dump(encoders, encoders_filename)
+        return accuracy,classification_rep,"base Model trained"
+@app.get("/trigger_the_data_fecher")
+async def your_continuous_function(page: str,paginate: str):
+    print("data fetcher running.....")
+    # Initialize an empty DataFrame to store the combined data
+    combined_df = pd.DataFrame()
+    # Update the payload for each page
+    url = "https://report.transexpress.lk/api/orders/delivery-success-rate/return-to-client-orders?page={page}&per_page={paginate}"
+    payload = {}
+    headers = {
+      'Cookie': 'development_trans_express_session=NaFDGzh5WQCFwiortxA6WEFuBjsAG9GHIQrbKZ8B'
+    }
+    response = requests.request("GET", url, headers=headers, data=payload)
+    # Sample JSON response
+    json_response = response.json()
+    # Extracting 'data' for conversion
+    data = json_response["return_to_client_orders"]['data']
+    data_count = len(data)
+    df = pd.json_normalize(data)
+    df['status_name'] = df['status_name'].replace('Partially Delivered', 'Delivered')
+    df['status_name'] = df['status_name'].replace('Received by Client', 'Returned to Client')
+    print("data collected from page : "+page)
+    #data.to_csv("new.csv")
+    accuracy,classification_rep,message = train_the_model(df)
+    return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
+@app.get("/get_latest_model_updated_time")
+async def model_updated_time():
+    try:
+        m_time_encoder = os.path.getmtime('transexpress_encoders.joblib')
+        m_time_model = os.path.getmtime('transexpress_xgb_model.joblib')
+        return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
+                "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
+    except:
+        return {"no model found so first trained the model using data fecther"}
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
     customer_name: str,
     customer_address: str,
     customer_phone: str,
     weight: int,
     cod: int,
     pickup_address: str,
+    client_number:str,
+    destination_city:str
+    ):
+    try:
+        # Load your trained model and encoders
+        xgb_model = load('transexpress_xgb_model.joblib')
+        encoders = load('transexpress_encoders.joblib')
+    except:
+        return {"no model found so first trained the model using data fecther"}
+    # Function to handle unseen labels during encoding
+    def safe_transform(encoder, column):
+        classes = encoder.classes_
+        return [encoder.transform([x])[0] if x in classes else -1 for x in column]
+    # Convert input data to DataFrame
     input_data = {
         'customer_name': customer_name,
         'customer_address': customer_address,
         'customer_phone_no': customer_phone,
         'weight': weight,
         'cod': cod,
         'pickup_address':pickup_address,
+        'client_number':client_number,
+        'destination_city':destination_city
     }
     input_df = pd.DataFrame([input_data])
     # Output
     predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
     probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
     if predicted_status == "Returned to Client":
        probability = 100 - probability
+    return {"Probability": round(probability,2)}