Arafath10 commited on
Commit
5cb1a46
1 Parent(s): aee20ee

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +188 -23
main.py CHANGED
@@ -1,22 +1,21 @@
1
- from fastapi import FastAPI, HTTPException
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
  import pandas as pd
 
 
 
 
 
 
 
 
5
  import numpy as np
6
- import joblib
7
-
8
-
9
- # Load your trained model and encoders
10
- xgb_model = joblib.load("model/transexpress_xgb_model.joblib")
11
- encoders = joblib.load("model/transexpress_encoders.joblib")
12
 
13
- # Function to handle unseen labels during encoding
14
- def safe_transform(encoder, column):
15
- classes = encoder.classes_
16
- return [encoder.transform([x])[0] if x in classes else -1 for x in column]
17
 
18
- # Define FastAPI app
19
  app = FastAPI()
 
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
@@ -25,31 +24,198 @@ app.add_middleware(
25
  allow_headers=["*"],
26
  )
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Endpoint for making predictions
29
  @app.post("/predict")
30
  def predict(
31
  customer_name: str,
32
  customer_address: str,
33
  customer_phone: str,
34
- customer_email: str,
35
  weight: int,
36
  cod: int,
37
  pickup_address: str,
38
- destination_city_name: str):
39
- # Convert input data to DataFrame
 
40
 
41
- if not destination_city_name.strip():
42
- destination_city_name = 'Missing'
43
- print(destination_city_name)
 
 
 
 
 
 
 
 
 
 
 
 
44
  input_data = {
45
  'customer_name': customer_name,
46
  'customer_address': customer_address,
47
  'customer_phone_no': customer_phone,
48
- 'client_email': customer_email,
49
  'weight': weight,
50
  'cod': cod,
51
  'pickup_address':pickup_address,
52
- 'destination_branch_name':destination_city_name
 
53
  }
54
  input_df = pd.DataFrame([input_data])
55
 
@@ -65,9 +231,8 @@ def predict(
65
  # Output
66
  predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
67
  probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
68
- print(input_data,predicted_status,probability)
69
 
70
  if predicted_status == "Returned to Client":
71
  probability = 100 - probability
72
 
73
- return {"Probability": round(probability,2)}
 
1
+ import asyncio
2
+ from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ import requests
5
  import pandas as pd
6
+ import json
7
+ import httpx,os,datetime
8
+ import pandas as pd
9
+ from sklearn.model_selection import train_test_split, GridSearchCV
10
+ from sklearn.preprocessing import LabelEncoder
11
+ from xgboost import XGBClassifier
12
+ from sklearn.metrics import accuracy_score, classification_report
13
+ from joblib import dump, load
14
  import numpy as np
 
 
 
 
 
 
15
 
 
 
 
 
16
 
 
17
  app = FastAPI()
18
+
19
  app.add_middleware(
20
  CORSMiddleware,
21
  allow_origins=["*"],
 
24
  allow_headers=["*"],
25
  )
26
 
27
+
28
+
29
+ def train_the_model(data):
30
+ try:
31
+ new_data = data
32
+ encoders = load('transexpress_encoders.joblib')
33
+ xgb_model = load('transexpress_xgb_model.joblib')
34
+ selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
35
+ 'weight','cod','pickup_address','client_number','destination_city',
36
+ 'status_name']
37
+
38
+ new_data_filled = new_data[selected_columns].fillna('Missing')
39
+ for col, encoder in encoders.items():
40
+ if col in new_data_filled.columns:
41
+ unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
42
+ if unseen_categories:
43
+ for category in unseen_categories:
44
+ encoder.classes_ = np.append(encoder.classes_, category)
45
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
46
+ else:
47
+ new_data_filled[col] = encoder.transform(new_data_filled[col])
48
+ X_new = new_data_filled.drop('status_name', axis=1)
49
+ y_new = new_data_filled['status_name']
50
+
51
+ X_train, X_test, y_train, y_test = train_test_split(X_new,y_new, test_size=0.2, random_state=42)
52
+
53
+ xgb_model.fit(X_new, y_new)
54
+ dump(xgb_model,'transexpress_xgb_model.joblib')
55
+
56
+
57
+ y_pred = xgb_model.predict(X_test)
58
+ accuracy = accuracy_score(y_test, y_pred)
59
+ classification_rep = classification_report(y_test, y_pred)
60
+ return accuracy,classification_rep,"Model finetuned with new data."
61
+
62
+
63
+ except:
64
+ data = data
65
+
66
+ # Select columns
67
+ selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
68
+ 'weight','cod','pickup_address','client_number','destination_city',
69
+ 'status_name']
70
+
71
+ # Handling missing values
72
+ data_filled = data[selected_columns].fillna('Missing')
73
+
74
+ # Encoding categorical variables
75
+ encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
76
+ for col, encoder in encoders.items():
77
+ data_filled[col] = encoder.fit_transform(data_filled[col])
78
+
79
+ # Splitting the dataset
80
+ X = data_filled.drop('status_name', axis=1)
81
+ y = data_filled['status_name']
82
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
83
+
84
+ # Setup the hyperparameter grid to search
85
+ param_grid = {
86
+ 'max_depth': [3, 4, 5],
87
+ 'learning_rate': [0.01, 0.1, 0.4],
88
+ 'n_estimators': [100, 200, 300],
89
+ 'subsample': [0.8, 0.9, 1],
90
+ 'colsample_bytree': [0.3, 0.7]
91
+ }
92
+
93
+ # Initialize the classifier
94
+ xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
95
+
96
+ # Setup GridSearchCV
97
+ grid_search = GridSearchCV(xgb, param_grid, cv=2, n_jobs=-1, scoring='accuracy')
98
+
99
+ # Fit the grid search to the data
100
+ grid_search.fit(X_train, y_train)
101
+
102
+ # Get the best parameters
103
+ best_params = grid_search.best_params_
104
+ print("Best parameters:", best_params)
105
+
106
+ # Train the model with best parameters
107
+ best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
108
+ best_xgb.fit(X_train, y_train)
109
+
110
+ # Predict on the test set
111
+ y_pred = best_xgb.predict(X_test)
112
+ y_pred_proba = best_xgb.predict_proba(X_test)
113
+
114
+ # Evaluate the model
115
+ accuracy = accuracy_score(y_test, y_pred)
116
+ classification_rep = classification_report(y_test, y_pred)
117
+
118
+ # Save the model
119
+ model_filename = 'transexpress_xgb_model.joblib'
120
+ dump(best_xgb, model_filename)
121
+
122
+ # Save the encoders
123
+ encoders_filename = 'transexpress_encoders.joblib'
124
+ dump(encoders, encoders_filename)
125
+
126
+ return accuracy,classification_rep,"base Model trained"
127
+
128
+ @app.get("/trigger_the_data_fecher")
129
+ async def your_continuous_function(page: str,paginate: str):
130
+ print("data fetcher running.....")
131
+
132
+ # Initialize an empty DataFrame to store the combined data
133
+ combined_df = pd.DataFrame()
134
+
135
+ # Update the payload for each page
136
+ url = "https://report.transexpress.lk/api/orders/delivery-success-rate/return-to-client-orders?page={page}&per_page={paginate}"
137
+
138
+ payload = {}
139
+ headers = {
140
+ 'Cookie': 'development_trans_express_session=NaFDGzh5WQCFwiortxA6WEFuBjsAG9GHIQrbKZ8B'
141
+ }
142
+
143
+ response = requests.request("GET", url, headers=headers, data=payload)
144
+
145
+ # Sample JSON response
146
+ json_response = response.json()
147
+ # Extracting 'data' for conversion
148
+ data = json_response["return_to_client_orders"]['data']
149
+
150
+ data_count = len(data)
151
+
152
+ df = pd.json_normalize(data)
153
+
154
+
155
+ df['status_name'] = df['status_name'].replace('Partially Delivered', 'Delivered')
156
+ df['status_name'] = df['status_name'].replace('Received by Client', 'Returned to Client')
157
+
158
+ print("data collected from page : "+page)
159
+ #data.to_csv("new.csv")
160
+
161
+ accuracy,classification_rep,message = train_the_model(df)
162
+
163
+ return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
164
+
165
+
166
+
167
+
168
+ @app.get("/get_latest_model_updated_time")
169
+ async def model_updated_time():
170
+ try:
171
+ m_time_encoder = os.path.getmtime('transexpress_encoders.joblib')
172
+ m_time_model = os.path.getmtime('transexpress_xgb_model.joblib')
173
+ return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
174
+ "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
175
+ except:
176
+ return {"no model found so first trained the model using data fecther"}
177
+
178
+
179
+
180
+
181
+
182
  # Endpoint for making predictions
183
  @app.post("/predict")
184
  def predict(
185
  customer_name: str,
186
  customer_address: str,
187
  customer_phone: str,
 
188
  weight: int,
189
  cod: int,
190
  pickup_address: str,
191
+ client_number:str,
192
+ destination_city:str
193
+ ):
194
 
195
+
196
+ try:
197
+ # Load your trained model and encoders
198
+ xgb_model = load('transexpress_xgb_model.joblib')
199
+ encoders = load('transexpress_encoders.joblib')
200
+ except:
201
+ return {"no model found so first trained the model using data fecther"}
202
+
203
+
204
+ # Function to handle unseen labels during encoding
205
+ def safe_transform(encoder, column):
206
+ classes = encoder.classes_
207
+ return [encoder.transform([x])[0] if x in classes else -1 for x in column]
208
+
209
+ # Convert input data to DataFrame
210
  input_data = {
211
  'customer_name': customer_name,
212
  'customer_address': customer_address,
213
  'customer_phone_no': customer_phone,
 
214
  'weight': weight,
215
  'cod': cod,
216
  'pickup_address':pickup_address,
217
+ 'client_number':client_number,
218
+ 'destination_city':destination_city
219
  }
220
  input_df = pd.DataFrame([input_data])
221
 
 
231
  # Output
232
  predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
233
  probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
 
234
 
235
  if predicted_status == "Returned to Client":
236
  probability = 100 - probability
237
 
238
+ return {"Probability": round(probability,2)}