Arafath10 commited on
Commit
64c5058
1 Parent(s): 5ca8728

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -93
main.py CHANGED
@@ -26,66 +26,11 @@ app.add_middleware(
26
 
27
 
28
 
29
- def train_the_model(data,page):
30
- if str(page) == "2":
31
 
32
- new_data = data
33
- encoders = load('transexpress_encoders.joblib')
34
- xgb_model = load('transexpress_xgb_model.joblib')
35
-
36
- # Selecting and filling missing data
37
- selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
38
- 'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
39
- 'status_name']
40
-
41
- new_data_filled = new_data[selected_columns].fillna('Missing')
42
- for col, encoder in encoders.items():
43
- if col in new_data_filled.columns:
44
- unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
45
- if unseen_categories:
46
- for category in unseen_categories:
47
- encoder.classes_ = np.append(encoder.classes_, category)
48
- new_data_filled[col] = encoder.transform(new_data_filled[col])
49
- else:
50
- new_data_filled[col] = encoder.transform(new_data_filled[col])
51
- X_new = new_data_filled.drop('status_name', axis=1)
52
- y_new = new_data_filled['status_name']
53
- X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
54
-
55
-
56
- # Setup the hyperparameter grid to search
57
- param_grid = {
58
- 'max_depth': [3, 4, 5],
59
- 'learning_rate': [0.01, 0.1, 0.4],
60
- 'n_estimators': [100, 200, 300],
61
- 'subsample': [0.8, 0.9, 1],
62
- 'colsample_bytree': [0.3, 0.7]
63
- }
64
-
65
- # Initialize the classifier
66
- #xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
67
-
68
- # Setup GridSearchCV
69
- grid_search = GridSearchCV(xgb_model, param_grid, cv=40, n_jobs=-1, scoring='accuracy')
70
-
71
- # Fit the grid search to the data
72
- grid_search.fit(X_train, y_train)
73
-
74
- dump(grid_search, 'transexpress_xgb_model.joblib')
75
-
76
- # Making predictions and evaluating the model
77
- y_pred = grid_search.predict(X_test)
78
- accuracy = accuracy_score(y_test, y_pred)
79
- classification_rep = classification_report(y_test, y_pred)
80
-
81
- # Returning the results
82
- return accuracy, classification_rep, "Model finetuned with new data."
83
-
84
-
85
- if str(page) == "1":
86
 
87
- data = data
88
-
89
  # Select columns
90
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
91
  'weight','cod','pickup_address','client_number','destination_city',
@@ -104,35 +49,27 @@ def train_the_model(data,page):
104
  y = data_filled['status_name']
105
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
106
 
107
- # Setup the hyperparameter grid to search
108
- param_grid = {
109
- 'max_depth': [3, 4, 5],
110
- 'learning_rate': [0.01, 0.1, 0.4],
111
- 'n_estimators': [100, 200, 300],
112
- 'subsample': [0.8, 0.9, 1],
113
- 'colsample_bytree': [0.3, 0.7]
 
 
114
  }
115
 
116
- # Initialize the classifier
117
- xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
118
-
119
- # Setup GridSearchCV
120
- grid_search = GridSearchCV(xgb, param_grid, cv=40, n_jobs=-1, scoring='accuracy')
121
-
122
- # Fit the grid search to the data
123
- grid_search.fit(X_train, y_train)
124
-
125
- # Get the best parameters
126
- best_params = grid_search.best_params_
127
- print("Best parameters:", best_params)
128
 
129
- # Train the model with best parameters
130
- best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
131
- best_xgb.fit(X_train, y_train)
132
 
133
  # Predict on the test set
134
- y_pred = best_xgb.predict(X_test)
135
- y_pred_proba = best_xgb.predict_proba(X_test)
136
 
137
  # Evaluate the model
138
  accuracy = accuracy_score(y_test, y_pred)
@@ -140,23 +77,17 @@ def train_the_model(data,page):
140
 
141
  # Save the model
142
  model_filename = 'transexpress_xgb_model.joblib'
143
- dump(best_xgb, model_filename)
144
 
145
  # Save the encoders
146
  encoders_filename = 'transexpress_encoders.joblib'
147
  dump(encoders, encoders_filename)
148
 
149
- return accuracy,classification_rep,"base Model trained"
150
 
151
  @app.get("/trigger_the_data_fecher")
152
  async def your_continuous_function(page: str,paginate: str):
153
 
154
- if str(page) == "2":
155
- df = pd.read_csv("transexpress_v10.csv")
156
- print("file readed")
157
- accuracy,classification_rep,message = train_the_model(df,page)
158
-
159
- return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
160
 
161
  print("data fetcher running.....")
162
 
@@ -188,9 +119,22 @@ async def your_continuous_function(page: str,paginate: str):
188
 
189
  print("data collected from page : "+page)
190
  #return "done"
191
- #data.to_csv("new.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
- accuracy,classification_rep,message = train_the_model(df,page)
194
 
195
  return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
196
 
@@ -214,10 +158,11 @@ async def model_updated_time():
214
  # Endpoint for making predictions
215
  @app.post("/predict")
216
  def predict(
 
217
  customer_name: str,
218
  customer_address: str,
219
  customer_phone: str,
220
- weight: int,
221
  cod: int,
222
  pickup_address: str,
223
  client_number:str,
 
26
 
27
 
28
 
29
+ def train_the_model():
 
30
 
31
+ data = pd.read_csv("trainer_data.csv")
32
+ print(data["customer_name"].count())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
34
  # Select columns
35
  selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
36
  'weight','cod','pickup_address','client_number','destination_city',
 
49
  y = data_filled['status_name']
50
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
51
 
52
+ # Parameters to use for the model
53
+ params = {
54
+ 'colsample_bytree': 0.3,
55
+ 'learning_rate': 0.6,
56
+ 'max_depth': 8,
57
+ 'n_estimators': 100,
58
+ 'subsample': 0.9,
59
+ 'use_label_encoder': False,
60
+ 'eval_metric': 'logloss'
61
  }
62
 
63
+ # Initialize the classifier with the specified parameters
64
+ xgb = XGBClassifier(**params)
 
 
 
 
 
 
 
 
 
 
65
 
66
+ # Train the model
67
+ xgb.fit(X_train, y_train)
68
+
69
 
70
  # Predict on the test set
71
+ y_pred = xgb.predict(X_test)
72
+ y_pred_proba = xgb.predict_proba(X_test)
73
 
74
  # Evaluate the model
75
  accuracy = accuracy_score(y_test, y_pred)
 
77
 
78
  # Save the model
79
  model_filename = 'transexpress_xgb_model.joblib'
80
+ dump(xgb, model_filename)
81
 
82
  # Save the encoders
83
  encoders_filename = 'transexpress_encoders.joblib'
84
  dump(encoders, encoders_filename)
85
 
86
+ return accuracy,classification_rep,"Model trained with new data"
87
 
88
  @app.get("/trigger_the_data_fecher")
89
  async def your_continuous_function(page: str,paginate: str):
90
 
 
 
 
 
 
 
91
 
92
  print("data fetcher running.....")
93
 
 
119
 
120
  print("data collected from page : "+page)
121
  #return "done"
122
+ try:
123
+ file_path = 'trainer_data.csv' # Replace with your file path
124
+ source_csv = pd.read_csv(file_path)
125
+ new_data = df
126
+ combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
127
+
128
+ combined_df_final.to_csv("trainer_data.csv")
129
+ print("data added")
130
+ except:
131
+
132
+ df.to_csv("trainer_data.csv")
133
+ print("data created")
134
+
135
+
136
 
137
+ accuracy,classification_rep,message = train_the_model()
138
 
139
  return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
140
 
 
158
  # Endpoint for making predictions
159
  @app.post("/predict")
160
  def predict(
161
+ date : str
162
  customer_name: str,
163
  customer_address: str,
164
  customer_phone: str,
165
+ weight: float,
166
  cod: int,
167
  pickup_address: str,
168
  client_number:str,