Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -4,20 +4,22 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
4 |
import requests
|
5 |
import pandas as pd
|
6 |
import json
|
7 |
-
import
|
8 |
import pandas as pd
|
9 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
10 |
from sklearn.preprocessing import LabelEncoder
|
11 |
-
from xgboost import XGBClassifier
|
12 |
from sklearn.utils import resample
|
|
|
13 |
from sklearn.metrics import accuracy_score, classification_report
|
14 |
from joblib import dump, load
|
15 |
import numpy as np
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
app = FastAPI()
|
19 |
|
20 |
-
|
21 |
app.add_middleware(
|
22 |
CORSMiddleware,
|
23 |
allow_origins=["*"],
|
@@ -26,16 +28,20 @@ app.add_middleware(
|
|
26 |
allow_headers=["*"],
|
27 |
)
|
28 |
|
29 |
-
|
30 |
-
@app.get("/train_the_model_new_v2")
|
31 |
-
async def train_the_model(Tenant: str):
|
32 |
-
# Load the dataset
|
33 |
-
data = pd.read_csv(f"model/{Tenant}trainer_data_v1.csv")
|
34 |
-
print(data["customer_name"].count())
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Analyze class distribution
|
37 |
-
class_distribution = data['
|
38 |
-
bf = str(class_distribution)
|
39 |
print("Class Distribution before balancing:\n", class_distribution)
|
40 |
|
41 |
# Get the size of the largest class to match other classes' sizes
|
@@ -43,7 +49,7 @@ async def train_the_model(Tenant: str):
|
|
43 |
|
44 |
# Oversampling
|
45 |
oversampled_data = pd.DataFrame()
|
46 |
-
for class_name, group in data.groupby('
|
47 |
oversampled_group = resample(group,
|
48 |
replace=True, # Sample with replacement
|
49 |
n_samples=max_class_size, # to match majority class
|
@@ -51,31 +57,36 @@ async def train_the_model(Tenant: str):
|
|
51 |
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
52 |
|
53 |
# Verify new class distribution
|
54 |
-
print("Class Distribution after oversampling:\n", oversampled_data['
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
data = oversampled_data
|
57 |
-
|
58 |
-
# Select columns
|
59 |
-
selected_columns = ['customer_name', 'customer_address', '
|
60 |
-
'cod',
|
61 |
-
'
|
62 |
|
63 |
# Handling missing values
|
64 |
#data_filled = data[selected_columns].fillna('Missing')
|
65 |
data_filled = data[selected_columns].dropna()
|
66 |
-
|
67 |
-
data_filled['created_at'] = data_filled['created_at'].astype(str)
|
68 |
-
#data_filled = data_filled.drop(columns=['created_at'])
|
69 |
-
|
70 |
-
af = str(oversampled_data['status.name'].value_counts())
|
71 |
# Encoding categorical variables
|
72 |
encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
|
73 |
for col, encoder in encoders.items():
|
74 |
data_filled[col] = encoder.fit_transform(data_filled[col])
|
75 |
|
76 |
# Splitting the dataset
|
77 |
-
X = data_filled.drop('
|
78 |
-
y = data_filled['
|
79 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
80 |
|
81 |
# Parameters to use for the model
|
@@ -115,159 +126,173 @@ async def train_the_model(Tenant: str):
|
|
115 |
classification_rep = classification_report(y_test, y_pred)
|
116 |
|
117 |
# Save the model
|
118 |
-
model_filename =
|
119 |
dump(xgb, model_filename)
|
120 |
|
121 |
# Save the encoders
|
122 |
-
encoders_filename =
|
123 |
dump(encoders, encoders_filename)
|
124 |
|
125 |
-
return accuracy,classification_rep,"Model trained with new data
|
126 |
|
|
|
|
|
127 |
|
128 |
-
@app.get("/trigger_the_data_fecher_for_me")
|
129 |
-
async def continuous_function(page: int,paginate: int,Tenant: str):
|
130 |
-
print("data fetcher running.....")
|
131 |
-
|
132 |
|
133 |
-
# Update the payload for each page
|
134 |
-
|
135 |
-
#url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
136 |
-
url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
137 |
-
|
138 |
-
|
139 |
-
payload = {}
|
140 |
-
headers = {
|
141 |
-
'Accept': 'application/json',
|
142 |
-
'X-Tenant': Tenant #'royalexpress'
|
143 |
-
}
|
144 |
-
|
145 |
-
response = requests.request("GET", url, headers=headers, data=payload)
|
146 |
-
|
147 |
-
# Sample JSON response
|
148 |
-
json_response = response.json()
|
149 |
-
# Extracting 'data' for conversion
|
150 |
-
data = json_response['data']
|
151 |
-
data_count = len(data)
|
152 |
-
|
153 |
-
df = pd.json_normalize(data)
|
154 |
-
|
155 |
-
|
156 |
-
df = df[df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
|
157 |
-
print("data collected from page : "+str(page))
|
158 |
-
#data.to_csv("new.csv")
|
159 |
-
|
160 |
-
try:
|
161 |
-
file_path = f'model/{Tenant}trainer_data_v1.csv' # Replace with your file path
|
162 |
-
source_csv = pd.read_csv(file_path)
|
163 |
-
new_data = df
|
164 |
-
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
165 |
-
|
166 |
-
combined_df_final.to_csv(f"model/{Tenant}trainer_data_v1.csv")
|
167 |
-
print("data added")
|
168 |
-
message = "data added"
|
169 |
-
except:
|
170 |
-
|
171 |
-
df.to_csv(f"model/{Tenant}trainer_data_v1.csv")
|
172 |
-
print("data created")
|
173 |
-
message = "data created"
|
174 |
-
|
175 |
-
return {"message":message,"page_number":page,"data_count":data_count,'X-Tenant': Tenant}
|
176 |
-
|
177 |
-
@app.get("/trigger_the_data_fecher")
|
178 |
-
async def your_continuous_function(page: int,paginate: int,Tenant: str):
|
179 |
print("data fetcher running.....")
|
180 |
|
|
|
|
|
181 |
|
182 |
# Update the payload for each page
|
|
|
183 |
|
184 |
-
#url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
185 |
-
url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
|
186 |
-
|
187 |
-
|
188 |
payload = {}
|
189 |
headers = {
|
190 |
-
|
191 |
-
|
192 |
-
}
|
193 |
|
194 |
response = requests.request("GET", url, headers=headers, data=payload)
|
195 |
|
196 |
# Sample JSON response
|
197 |
json_response = response.json()
|
198 |
# Extracting 'data' for conversion
|
199 |
-
data = json_response['data']
|
|
|
200 |
data_count = len(data)
|
201 |
|
202 |
df = pd.json_normalize(data)
|
203 |
|
204 |
|
205 |
-
df = df[
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
209 |
try:
|
210 |
-
file_path =
|
211 |
source_csv = pd.read_csv(file_path)
|
212 |
new_data = df
|
213 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
214 |
|
215 |
-
combined_df_final.to_csv(
|
216 |
print("data added")
|
217 |
except:
|
218 |
|
219 |
-
df.to_csv(
|
220 |
print("data created")
|
221 |
-
|
222 |
-
return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
|
223 |
|
|
|
|
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
|
|
|
|
|
|
|
|
|
|
226 |
|
227 |
@app.get("/get_latest_model_updated_time")
|
228 |
-
async def model_updated_time(
|
229 |
-
import multiprocessing
|
230 |
-
|
231 |
-
# Get the number of available CPU cores
|
232 |
-
available_cores = multiprocessing.cpu_count()
|
233 |
try:
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
|
239 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model),
|
240 |
-
"
|
241 |
}
|
242 |
except:
|
243 |
return {"no model found so first trained the model using data fecther"}
|
244 |
|
245 |
|
246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
-
# Endpoint for making predictions
|
250 |
|
|
|
251 |
@app.post("/predict")
|
252 |
-
def predict(
|
253 |
-
|
254 |
customer_name: str,
|
255 |
customer_address: str,
|
256 |
customer_phone: str,
|
257 |
-
|
258 |
-
|
259 |
-
origin_city_name: str,
|
260 |
-
destination_city_name: str,
|
261 |
-
created_at: str,
|
262 |
-
customer_email: str,
|
263 |
pickup_address: str,
|
264 |
-
|
|
|
265 |
):
|
266 |
|
|
|
267 |
try:
|
268 |
# Load your trained model and encoders
|
269 |
-
xgb_model = load(
|
270 |
-
encoders = load(
|
271 |
except:
|
272 |
return {"no model found so first trained the model using data fecther"}
|
273 |
|
@@ -276,26 +301,20 @@ def predict(
|
|
276 |
def safe_transform(encoder, column):
|
277 |
classes = encoder.classes_
|
278 |
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
279 |
-
# Function to handle unseen labels during encoding
|
280 |
-
def safe_transform(encoder, column):
|
281 |
-
classes = encoder.classes_
|
282 |
-
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
283 |
-
|
284 |
|
285 |
-
|
286 |
input_data = {
|
287 |
'customer_name': customer_name,
|
288 |
'customer_address': customer_address,
|
289 |
-
'
|
|
|
290 |
'cod': int(cod),
|
291 |
-
'
|
292 |
-
'
|
293 |
-
'destination_city
|
294 |
-
'created_at':created_at
|
295 |
}
|
296 |
input_df = pd.DataFrame([input_data])
|
297 |
|
298 |
-
|
299 |
# Encode categorical variables using the same encoders used during training
|
300 |
for col in input_df.columns:
|
301 |
if col in encoders:
|
@@ -304,12 +323,69 @@ def predict(
|
|
304 |
# Predict and obtain probabilities
|
305 |
pred = xgb_model.predict(input_df)
|
306 |
pred_proba = xgb_model.predict_proba(input_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
|
|
|
|
|
|
|
308 |
# Output
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
|
312 |
-
if predicted_status == "RETURN TO CLIENT":
|
313 |
-
probability = 100 - probability
|
314 |
|
315 |
-
return {"
|
|
|
4 |
import requests
|
5 |
import pandas as pd
|
6 |
import json
|
7 |
+
import os,datetime
|
8 |
import pandas as pd
|
9 |
from sklearn.model_selection import train_test_split, GridSearchCV
|
10 |
from sklearn.preprocessing import LabelEncoder
|
|
|
11 |
from sklearn.utils import resample
|
12 |
+
from xgboost import XGBClassifier
|
13 |
from sklearn.metrics import accuracy_score, classification_report
|
14 |
from joblib import dump, load
|
15 |
import numpy as np
|
16 |
+
import requests
|
17 |
+
import mysql.connector
|
18 |
+
from mysql.connector import Error
|
19 |
|
20 |
|
21 |
app = FastAPI()
|
22 |
|
|
|
23 |
app.add_middleware(
|
24 |
CORSMiddleware,
|
25 |
allow_origins=["*"],
|
|
|
28 |
allow_headers=["*"],
|
29 |
)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
@app.get("/trigger_the_model_trainer")
|
33 |
+
async def train_the_model():
|
34 |
+
# Load the dataset
|
35 |
+
#file_path = 'model/trainer_data.csv' # Update to the correct file path 'model/trainer_data_new.csv'
|
36 |
+
#data = pd.read_csv(file_path)
|
37 |
+
csv_files = ['model/trainer_data.csv','model/trainer_data2.csv','model/trainer_data3.csv','model/trainer_data4.csv']
|
38 |
+
data_frames = [pd.read_csv(file) for file in csv_files]
|
39 |
+
|
40 |
+
# Step 4: Concatenate all DataFrames into a single DataFrame
|
41 |
+
data = pd.concat(data_frames, ignore_index=True)
|
42 |
+
#data = data.iloc[0:50000]
|
43 |
# Analyze class distribution
|
44 |
+
class_distribution = data['status_name'].value_counts()
|
|
|
45 |
print("Class Distribution before balancing:\n", class_distribution)
|
46 |
|
47 |
# Get the size of the largest class to match other classes' sizes
|
|
|
49 |
|
50 |
# Oversampling
|
51 |
oversampled_data = pd.DataFrame()
|
52 |
+
for class_name, group in data.groupby('status_name'):
|
53 |
oversampled_group = resample(group,
|
54 |
replace=True, # Sample with replacement
|
55 |
n_samples=max_class_size, # to match majority class
|
|
|
57 |
oversampled_data = pd.concat([oversampled_data, oversampled_group], axis=0)
|
58 |
|
59 |
# Verify new class distribution
|
60 |
+
print("Class Distribution after oversampling:\n", oversampled_data['status_name'].value_counts())
|
61 |
|
62 |
+
# Save the balanced dataset if needed
|
63 |
+
#oversampled_data.to_csv('model/trainer_data_balanced.csv', index=False)
|
64 |
+
|
65 |
+
data = pd.read_csv("model/trainer_data_new.csv")
|
66 |
+
print(data["customer_name"].count())
|
67 |
+
|
68 |
+
data = pd.read_csv("model/trainer_data_balanced.csv")
|
69 |
+
print(data["customer_name"].count())
|
70 |
+
|
71 |
data = oversampled_data
|
72 |
+
print(data["customer_name"].count())
|
73 |
+
# Select columns
|
74 |
+
selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
|
75 |
+
'weight','cod','pickup_address','client_number','destination_city',
|
76 |
+
'status_name']
|
77 |
|
78 |
# Handling missing values
|
79 |
#data_filled = data[selected_columns].fillna('Missing')
|
80 |
data_filled = data[selected_columns].dropna()
|
81 |
+
|
|
|
|
|
|
|
|
|
82 |
# Encoding categorical variables
|
83 |
encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
|
84 |
for col, encoder in encoders.items():
|
85 |
data_filled[col] = encoder.fit_transform(data_filled[col])
|
86 |
|
87 |
# Splitting the dataset
|
88 |
+
X = data_filled.drop('status_name', axis=1)
|
89 |
+
y = data_filled['status_name']
|
90 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
91 |
|
92 |
# Parameters to use for the model
|
|
|
126 |
classification_rep = classification_report(y_test, y_pred)
|
127 |
|
128 |
# Save the model
|
129 |
+
model_filename = 'model/transexpress_xgb_model.joblib'
|
130 |
dump(xgb, model_filename)
|
131 |
|
132 |
# Save the encoders
|
133 |
+
encoders_filename = 'model/transexpress_encoders.joblib'
|
134 |
dump(encoders, encoders_filename)
|
135 |
|
136 |
+
return accuracy,classification_rep,"Model trained with new data"
|
137 |
|
138 |
+
@app.get("/trigger_the_data_fecher")
|
139 |
+
async def get_data(page: str,paginate: str):
|
140 |
|
|
|
|
|
|
|
|
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
print("data fetcher running.....")
|
143 |
|
144 |
+
# Initialize an empty DataFrame to store the combined data
|
145 |
+
combined_df = pd.DataFrame()
|
146 |
|
147 |
# Update the payload for each page
|
148 |
+
url = "https://report.transexpress.lk/api/orders/delivery-success-rate/return-to-client-orders?page="+page+"&per_page="+paginate
|
149 |
|
|
|
|
|
|
|
|
|
150 |
payload = {}
|
151 |
headers = {
|
152 |
+
'Cookie': 'development_trans_express_session=NaFDGzh5WQCFwiortxA6WEFuBjsAG9GHIQrbKZ8B'
|
153 |
+
}
|
|
|
154 |
|
155 |
response = requests.request("GET", url, headers=headers, data=payload)
|
156 |
|
157 |
# Sample JSON response
|
158 |
json_response = response.json()
|
159 |
# Extracting 'data' for conversion
|
160 |
+
data = json_response["return_to_client_orders"]['data']
|
161 |
+
|
162 |
data_count = len(data)
|
163 |
|
164 |
df = pd.json_normalize(data)
|
165 |
|
166 |
|
167 |
+
df['status_name'] = df['status_name'].replace('Partially Delivered', 'Delivered')
|
168 |
+
df['status_name'] = df['status_name'].replace('Received by Client', 'Returned to Client')
|
169 |
+
|
170 |
+
print("data collected from page : "+page)
|
171 |
+
#return "done"
|
172 |
try:
|
173 |
+
file_path = 'model/trainer_data5.csv' # Replace with your file path
|
174 |
source_csv = pd.read_csv(file_path)
|
175 |
new_data = df
|
176 |
combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
|
177 |
|
178 |
+
combined_df_final.to_csv("model/trainer_data5.csv")
|
179 |
print("data added")
|
180 |
except:
|
181 |
|
182 |
+
df.to_csv("model/trainer_data5.csv")
|
183 |
print("data created")
|
|
|
|
|
184 |
|
185 |
+
print({"page_number":page,"data_count":data_count})
|
186 |
+
return {"page_number":page,"data_count":data_count}
|
187 |
|
188 |
+
@app.get("/get_module_versions")
|
189 |
+
async def get_versions():
|
190 |
+
try:
|
191 |
+
from pip._internal.operations import freeze
|
192 |
+
except ImportError: # pip < 10.0
|
193 |
+
from pip.operations import freeze
|
194 |
|
195 |
+
pkgs = freeze.freeze()
|
196 |
+
for pkg in pkgs:
|
197 |
+
print(pkg)
|
198 |
+
return pkgs
|
199 |
+
|
200 |
|
201 |
@app.get("/get_latest_model_updated_time")
|
202 |
+
async def model_updated_time():
|
|
|
|
|
|
|
|
|
203 |
try:
|
204 |
+
file_size = os.path.getsize("model/transexpress_xgb_model.joblib")
|
205 |
+
m_time_encoder = os.path.getmtime('model/transexpress_encoders.joblib')
|
206 |
+
m_time_model = os.path.getmtime('model/transexpress_xgb_model.joblib')
|
207 |
+
return {"base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
|
|
|
208 |
"last model updated time":datetime.datetime.fromtimestamp(m_time_model),
|
209 |
+
"The size of the file is bytes":file_size
|
210 |
}
|
211 |
except:
|
212 |
return {"no model found so first trained the model using data fecther"}
|
213 |
|
214 |
|
215 |
|
216 |
+
# Database connection parameters
|
217 |
+
DB_HOST = 'trans-prod-clone-staging.mysql.database.azure.com'
|
218 |
+
DB_PORT = 3306
|
219 |
+
DB_DATABASE = 'defaultdb'
|
220 |
+
DB_USERNAME = 'wwwdata'
|
221 |
+
DB_PASSWORD = 'fcLa8F3sxgNYQ$K@%'
|
222 |
+
# Connect to the database
|
223 |
|
224 |
+
#calling this function for each request
|
225 |
+
def fetch_customer_data(phone_number):
|
226 |
+
#local connection
|
227 |
+
connection = mysql.connector.connect(
|
228 |
+
host=DB_HOST,
|
229 |
+
port=DB_PORT,
|
230 |
+
database=DB_DATABASE,
|
231 |
+
user=DB_USERNAME,
|
232 |
+
password=DB_PASSWORD
|
233 |
+
)
|
234 |
+
#try:
|
235 |
+
if connection.is_connected():
|
236 |
+
print("Connected to the database")
|
237 |
+
|
238 |
+
# SQL query
|
239 |
+
query = """
|
240 |
+
SELECT
|
241 |
+
orders.customer_name AS customer_name,
|
242 |
+
orders.address AS customer_address,
|
243 |
+
orders.phone_no AS customer_phone_no,
|
244 |
+
primary_statuses.name AS status_name
|
245 |
+
FROM
|
246 |
+
orders
|
247 |
+
INNER JOIN
|
248 |
+
statuses ON orders.status_id = statuses.id
|
249 |
+
INNER JOIN
|
250 |
+
primary_statuses ON statuses.name = primary_statuses.key
|
251 |
+
WHERE orders.phone_no LIKE %s
|
252 |
+
"""
|
253 |
+
|
254 |
+
# Execute the query
|
255 |
+
cursor = connection.cursor(dictionary=True)
|
256 |
+
cursor.execute(query, (f"%{phone_number}%",))
|
257 |
+
|
258 |
+
# Fetch results
|
259 |
+
results = cursor.fetchall()
|
260 |
+
#print("Results:", results)
|
261 |
+
#close conection
|
262 |
+
#if connection.is_connected():
|
263 |
+
cursor.close()
|
264 |
+
connection.close()
|
265 |
+
print("Database connection closed")
|
266 |
+
return results
|
267 |
+
|
268 |
+
# except Error as e:
|
269 |
+
# print(f"Error: {e}")
|
270 |
+
# #close conection
|
271 |
+
# #if connection.is_connected():
|
272 |
+
# cursor.close()
|
273 |
+
# connection.close()
|
274 |
+
# print("Database connection closed")
|
275 |
|
|
|
276 |
|
277 |
+
# Endpoint for making predictions
|
278 |
@app.post("/predict")
|
279 |
+
async def predict(
|
280 |
+
date : str,
|
281 |
customer_name: str,
|
282 |
customer_address: str,
|
283 |
customer_phone: str,
|
284 |
+
weight: float,
|
285 |
+
cod: int,
|
|
|
|
|
|
|
|
|
286 |
pickup_address: str,
|
287 |
+
client_number:str,
|
288 |
+
destination_city:str
|
289 |
):
|
290 |
|
291 |
+
|
292 |
try:
|
293 |
# Load your trained model and encoders
|
294 |
+
xgb_model = load('model/transexpress_xgb_model.joblib')
|
295 |
+
encoders = load('model/transexpress_encoders.joblib')
|
296 |
except:
|
297 |
return {"no model found so first trained the model using data fecther"}
|
298 |
|
|
|
301 |
def safe_transform(encoder, column):
|
302 |
classes = encoder.classes_
|
303 |
return [encoder.transform([x])[0] if x in classes else -1 for x in column]
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
+
# Convert input data to DataFrame
|
306 |
input_data = {
|
307 |
'customer_name': customer_name,
|
308 |
'customer_address': customer_address,
|
309 |
+
'customer_phone_no': customer_phone,
|
310 |
+
'weight': float(weight),
|
311 |
'cod': int(cod),
|
312 |
+
'pickup_address':pickup_address,
|
313 |
+
'client_number':client_number,
|
314 |
+
'destination_city':destination_city
|
|
|
315 |
}
|
316 |
input_df = pd.DataFrame([input_data])
|
317 |
|
|
|
318 |
# Encode categorical variables using the same encoders used during training
|
319 |
for col in input_df.columns:
|
320 |
if col in encoders:
|
|
|
323 |
# Predict and obtain probabilities
|
324 |
pred = xgb_model.predict(input_df)
|
325 |
pred_proba = xgb_model.predict_proba(input_df)
|
326 |
+
|
327 |
+
import numpy as np
|
328 |
+
from urllib.parse import unquote
|
329 |
+
def extract_phone_numbers(customer_phone):
|
330 |
+
# Decode URL-encoded phone numbers
|
331 |
+
decoded_phone = unquote(customer_phone)
|
332 |
+
# Split into a list of phone numbers
|
333 |
+
phone_numbers = [phone.strip() for phone in decoded_phone.split('/')]
|
334 |
+
# Handle case where there is a single phone number
|
335 |
+
if len(phone_numbers) == 1 and phone_numbers[0]:
|
336 |
+
return phone_numbers
|
337 |
+
elif len(phone_numbers) == 0:
|
338 |
+
return []
|
339 |
+
return phone_numbers
|
340 |
+
|
341 |
+
|
342 |
+
def calculate_delivery_factor(phone_number):
|
343 |
+
# Replace with the desired customer name and phone number
|
344 |
+
|
345 |
+
#customer_phone_no = '0773224384'
|
346 |
+
json = fetch_customer_data(phone_number)
|
347 |
+
data = json
|
348 |
+
#print(url,data)
|
349 |
+
# Filter only relevant status names
|
350 |
+
valid_statuses = ['Failed to Deliver', 'Delivered', 'Returned to Client']
|
351 |
+
relevant_orders = [order for order in data if order['status_name'] in valid_statuses]
|
352 |
+
|
353 |
+
if not relevant_orders:
|
354 |
+
base_probability = 0.50
|
355 |
+
else:
|
356 |
+
delivered_count = sum(1 for order in relevant_orders if order['status_name'] == 'Delivered')
|
357 |
+
total_orders_count = len(relevant_orders)
|
358 |
+
|
359 |
+
base_probability = delivered_count / total_orders_count
|
360 |
+
base_probability = max(0.05, min(base_probability, 0.95))
|
361 |
+
|
362 |
+
# Add a narrower random component
|
363 |
+
random_component = np.random.uniform(-0.05, 0.05)
|
364 |
+
adjusted_probability = base_probability + random_component
|
365 |
+
|
366 |
+
return adjusted_probability
|
367 |
+
try:
|
368 |
+
|
369 |
+
print(customer_phone)
|
370 |
+
phone_numbers = extract_phone_numbers(customer_phone)
|
371 |
+
print(phone_numbers, "api calling ......")
|
372 |
+
probability = calculate_delivery_factor(phone_numbers[0])
|
373 |
+
probability = round((probability * 100),2)
|
374 |
+
#probability = f"{probability:.2f}" probability = f"{float(probability):.2f}"
|
375 |
|
376 |
+
print(f"new model probability: {probability}")
|
377 |
+
predicted_status = "delivered"
|
378 |
+
|
379 |
# Output
|
380 |
+
except Exception as e:
|
381 |
+
print(f"Error: {e}")
|
382 |
+
predicted_status = "Unknown" if pred[0] == -1 else encoders['status_name'].inverse_transform([pred])[0]
|
383 |
+
probability = pred_proba[0][pred[0]] * 100 if pred[0] != -1 else "Unknown"
|
384 |
+
print(str(predicted_status),probability)
|
385 |
+
if probability>98:
|
386 |
+
probability = probability-1
|
387 |
+
if predicted_status == "Returned to Client":
|
388 |
+
probability = 100 - probability
|
389 |
|
|
|
|
|
390 |
|
391 |
+
return {"Probability": round(probability,2),"predicted_status":predicted_status}
|