uservipin commited on
Commit
b6b9d98
1 Parent(s): bfaaca4

updating classification module

Browse files

Now classification module is able to handle categorical and numerical datatypes of data by fefault

__pycache__/classification.cpython-310.pyc CHANGED
Binary files a/__pycache__/classification.cpython-310.pyc and b/__pycache__/classification.cpython-310.pyc differ
 
__pycache__/resume.cpython-310.pyc CHANGED
Binary files a/__pycache__/resume.cpython-310.pyc and b/__pycache__/resume.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,24 +2,37 @@ from classification import ClassificationModels
2
  from regression import RegressionModels
3
  from resume import Resume
4
 
 
 
 
 
 
 
5
  import pandas as pd
6
  import warnings
7
  import streamlit as st
8
- warnings.filterwarnings("ignore")
9
  import uuid
10
  import time
11
  import os
12
  import io
13
  import pathlib
14
  import textwrap
 
15
  import google.generativeai as genai
16
  from dotenv import load_dotenv
17
  from PIL import Image
 
 
 
 
 
18
 
19
  load_dotenv() # take environment variables from .env.
20
  os.getenv("GOOGLE_API_KEY")
21
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
22
 
 
 
23
  ## Function to load OpenAI model and get respones
24
  model_chat = genai.GenerativeModel('gemini-pro')
25
  chat = model_chat.start_chat(history=[])
@@ -39,7 +52,7 @@ def get_gemini_response_vision(input,image):
39
  else:
40
  response = model_vision.generate_content(image)
41
  return response.text
42
-
43
  def gemini_model():
44
  ##initialize our streamlit app
45
  # st.set_page_config(page_title="Q&A Demo")
@@ -55,175 +68,10 @@ def gemini_model():
55
  print("_"*80)
56
 
57
  # st.write(chat.history)
58
- # data cleaning: https://bank-performance.streamlit.app/
59
- # https://docs.streamlit.io/library/api-reference/layout
60
-
61
 
62
  # Define function for each page
63
- # def classification():
64
- # st.title("Home Page")
65
- # st.write("Welcome to the Home Page")
66
-
67
- def regressor():
68
- EDA, train, test = st.tabs(['EDA/Transformation','Train','Test'])
69
-
70
- with train:
71
- st.title("Regression / Train data")
72
- spectra = st.file_uploader("**Upload file**", type={"csv", "txt"})
73
-
74
- if spectra is not None:
75
- spectra_df = pd.read_csv(spectra)
76
-
77
- st.write(spectra_df.head(5))
78
- # st.write("Headers", spectra_df.columns.tolist())
79
- st.write("**Total Rows**", spectra_df.shape[0])
80
-
81
- st.divider()
82
 
83
- option = st.text_input("**Select Output Column**:")
84
- st.divider()
85
-
86
- if option:
87
- st.write("**You have selected output column**: ", option)
88
-
89
- y = spectra_df[option]
90
- X= spectra_df.drop(option, axis=1)
91
-
92
- # Define the columns with your content
93
- col1, col2 = st.columns([4,1], gap="small")
94
-
95
- # Add content to col1
96
- with col1:
97
- st.write("Train data excluding output")
98
- st.write(X.head(5))
99
-
100
- # Add content to col2
101
- with col2:
102
- st.write("Output")
103
- st.write(y.head(5))
104
-
105
- st.divider()
106
-
107
- # Select models
108
- # models_list = [
109
- # 'Linear Regression', 'Polynomial Regression', 'Ridge Regression',
110
- # 'Lasso Regression', 'ElasticNet Regression', 'Logistic Regression',
111
- # 'Decision Tree Regression', 'Random Forest Regression',
112
- # 'Gradient Boosting Regression', 'Support Vector Regression (SVR)',
113
- # 'XGBoost', 'LightGBM'
114
- # ]
115
-
116
- models_list = [
117
- 'Linear Regression',
118
- 'Polynomial Regression',
119
- 'Ridge Regression',
120
- 'Lasso Regression',
121
- 'ElasticNet Regression',
122
- 'Logistic Regression',
123
- 'Decision Tree Regression',
124
- 'Random Forest Regression',
125
- 'Gradient Boosting Regression',
126
- 'Support Vector Regression (SVR)',
127
- 'XGBoost',
128
- 'LightGBM'
129
- ]
130
-
131
- selected_models = st.multiselect('Select Regression Models', models_list)
132
-
133
- if selected_models:
134
- # Initialize RegressionModels class
135
- models = RegressionModels()
136
-
137
- # Add data
138
- models.add_data(X, y)
139
-
140
- # Split data into training and testing sets
141
- models.split_data()
142
-
143
- # Train and evaluate selected models
144
- for model_name in selected_models:
145
- st.subheader(f"Model: {model_name}")
146
- models.fit(model_name)
147
- y_pred = models.train(model_name)
148
- mse, r2 = models.evaluate(model_name)
149
- st.write(f"MSE: {mse}")
150
- st.write(f"R-squared: {r2}")
151
-
152
- def NLP():
153
- Gemini_Chat,Gemini_Vision, Bert, = st.tabs(['Gemini-Chat','Gemini-Vision','Bert'])
154
-
155
- with Gemini_Chat:
156
- st.title("Chat with Gemini Pro")
157
- gemini_model()
158
-
159
- with Gemini_Vision:
160
- #initialize our streamlit app
161
- #st.set_page_config(page_title="Gemini Image Demo")
162
- st.header("Gemini Application")
163
- input=st.text_input("Input Prompt: ",key="input_prompt")
164
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
165
- image=""
166
-
167
- if uploaded_file is not None:
168
- image = Image.open(uploaded_file)
169
- #image = Image.open(io.BytesIO(uploaded_file.read()))
170
-
171
- st.image(image, caption="Uploaded Image.", use_column_width=True)
172
-
173
- submit=st.button("Tell me about the image")
174
- ## If ask button is clicked
175
- if submit:
176
- response=get_gemini_response_vision(input,image)
177
- st.subheader("The Response is")
178
- st.write(response)
179
-
180
- with Bert:
181
- st.title(" Bert model will available soon")
182
-
183
- def Voice():
184
- st.title("Home Page")
185
- st.write("Welcome to the Home Page")
186
-
187
- def Video():
188
- st.title("Home Page")
189
- st.write("Welcome to the Home Page")
190
-
191
- def LLMs():
192
- st.title("About Page")
193
- st.write("This is the About Page")
194
-
195
- def AI():
196
- st.title("Need to add models")
197
- #st.write("This is the About AI")
198
-
199
- def resume():
200
- st.title("Resume")
201
- st.write("")
202
- About, Work_Experience,Skills_Tools, Education_Certification = st.tabs(["About", "Work Experience","Skills & Tools", "Education & Certificates"])
203
-
204
- with About:
205
- Resume().display_information()
206
-
207
- with Work_Experience:
208
- Resume().display_work_experience()
209
-
210
- with Skills_Tools:
211
- Resume().skills_tools()
212
-
213
- with Education_Certification:
214
- Resume().display_education_certificate()
215
-
216
-
217
-
218
- # Main function to run the app
219
- def main():
220
-
221
- st.sidebar.title("Deep Learning/ Data Science/ AI Models")
222
- # page_options = ["Classification", "Regressor", "NLP", "Image", "Voice", "Video", "LLMs"]
223
- page_options = ["NLP","AI","Classification", "Regressor","Deep Learning", "Resume"]
224
- choice = st.sidebar.radio("Select", page_options)
225
-
226
- if choice == "Classification":
227
  train, test = st.tabs(['Train','Test'])
228
 
229
  with train:
@@ -245,8 +93,8 @@ def main():
245
  if option:
246
  st.write("**You have selected output column**: ", option)
247
 
248
- y = spectra_df[option]
249
  X= spectra_df.drop(option, axis=1)
 
250
 
251
  # Define the columns with your content
252
  col1, col2 = st.columns([4,1], gap="small")
@@ -288,10 +136,9 @@ def main():
288
  # Execute further code based on selected models
289
  if selected_models:
290
  # st.write("Selected Models:", selected_models)
291
-
292
  # Toggle to add hyperparameters
293
  add_hyperparameters = st.toggle("Add Hyperparameters")
294
-
295
  # If hyperparameters should be added
296
  if add_hyperparameters:
297
  num_models = len(selected_models)
@@ -380,7 +227,10 @@ def main():
380
  # for model_name in model_hyperparameters
381
 
382
  if models == "Naive Bayes Classifier":
 
 
383
  naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters)
 
384
  naive_bayes_accuracy = clf.evaluate_model(naive_bayes_model)
385
  # naive_bayes_classification_report = clf.evaluate_classification_report(naive_bayes_model)
386
  # st.write("Naive Bayes Accuracy:", naive_bayes_accuracy)
@@ -455,8 +305,8 @@ def main():
455
 
456
  if spectra_1 is not None:
457
  spectra_df1 = pd.read_csv(spectra_1)
458
- Actual = spectra_df1['Disease']
459
- spectra_df1 = spectra_df1.drop(columns=['Disease'])
460
  st.write(spectra_df1.head(5))
461
  st.divider()
462
 
@@ -474,42 +324,41 @@ def main():
474
  if max_key == "Naive Bayes Classifier":
475
  # naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters)
476
  naive_bayes_model =naive_bayes_model.predict()
477
- st.write("Naive Bayes Model:", naive_bayes_model)
 
 
478
 
479
  if max_key == "Logistic Regression":
480
- st.write("Logistic Regression Model Hyperparameter:", model_hyperparameters)
481
  logistic_regression_model_ = logistic_regression_model.predict(X)
482
-
483
  X['Predict'] = logistic_regression_model_
484
- X['Actual'] = Actual
485
  st.write("Output : ", X)
486
-
487
- logistic_regression_accuracy = clf.evaluate_model(logistic_regression_model)
488
- # logistic_regression_classification_report = clf.evaluate_classification_report(logistic_regression_model)
489
- st.write("Logistic Regression Accuracy:", logistic_regression_accuracy)
490
- # accuracy_dict[models] = logistic_regression_accuracy
491
 
492
  if max_key == "Decision Tree":
493
  decision_tree_model_ = decision_tree_model.predict(X)
494
  X['Predict'] = decision_tree_model_
495
- X['Actual'] = Actual
496
- st.write("Output : ", X)
497
 
498
  if max_key == "Random Forests":
499
  random_forests_model = random_forests_model.predict(X)
500
- st.write("Random Forests Model:", random_forests_model)
 
501
 
502
  if max_key == "SVM":
503
  svm_model = svm_model.predict(X)
504
- st.write("Support Vector Machines Model:", svm_model)
 
505
 
506
  if max_key == "KNN":
507
  knn_model = knn_model.predict(X)
508
- st.write("K-Nearest Neighbors Model:", knn_model)
 
509
 
510
  if max_key == "K- Means Clustering":
511
  kmeans_model =kmeans_model.predict(X)
512
- st.write("K-Means Clustering Model:", kmeans_model)
 
513
 
514
  st.divider()
515
 
@@ -517,28 +366,178 @@ def main():
517
  st.download_button(
518
  label="Download data as CSV",
519
  data=data_frame,
520
- file_name='large_df.csv',
521
  mime='text/csv',
522
  )
523
 
524
  st.divider()
525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  elif choice == "Regressor":
527
  regressor()
528
- elif choice == "NLP":
529
  NLP()
530
 
531
- if choice == "Image":
532
- Image()
533
-
534
- if choice == "Voice":
535
- Voice()
536
 
537
- if choice == "AI":
538
- AI()
539
-
540
- if choice == "LLMs":
541
- LLMs()
542
  if choice == 'Resume':
543
  resume()
544
 
 
2
  from regression import RegressionModels
3
  from resume import Resume
4
 
5
+ from sklearn.impute import SimpleImputer
6
+ from sklearn.pipeline import Pipeline
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
+
10
+
11
  import pandas as pd
12
  import warnings
13
  import streamlit as st
 
14
  import uuid
15
  import time
16
  import os
17
  import io
18
  import pathlib
19
  import textwrap
20
+
21
  import google.generativeai as genai
22
  from dotenv import load_dotenv
23
  from PIL import Image
24
+ warnings.filterwarnings("ignore")
25
+
26
+ # data cleaning: https://bank-performance.streamlit.app/
27
+ # https://docs.streamlit.io/library/api-reference/layout
28
+
29
 
30
  load_dotenv() # take environment variables from .env.
31
  os.getenv("GOOGLE_API_KEY")
32
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
33
 
34
+
35
+
36
  ## Function to load OpenAI model and get respones
37
  model_chat = genai.GenerativeModel('gemini-pro')
38
  chat = model_chat.start_chat(history=[])
 
52
  else:
53
  response = model_vision.generate_content(image)
54
  return response.text
55
+
56
  def gemini_model():
57
  ##initialize our streamlit app
58
  # st.set_page_config(page_title="Q&A Demo")
 
68
  print("_"*80)
69
 
70
  # st.write(chat.history)
 
 
 
71
 
72
  # Define function for each page
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ def classification():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  train, test = st.tabs(['Train','Test'])
76
 
77
  with train:
 
93
  if option:
94
  st.write("**You have selected output column**: ", option)
95
 
 
96
  X= spectra_df.drop(option, axis=1)
97
+ y = spectra_df[option]
98
 
99
  # Define the columns with your content
100
  col1, col2 = st.columns([4,1], gap="small")
 
136
  # Execute further code based on selected models
137
  if selected_models:
138
  # st.write("Selected Models:", selected_models)
 
139
  # Toggle to add hyperparameters
140
  add_hyperparameters = st.toggle("Add Hyperparameters")
141
+
142
  # If hyperparameters should be added
143
  if add_hyperparameters:
144
  num_models = len(selected_models)
 
227
  # for model_name in model_hyperparameters
228
 
229
  if models == "Naive Bayes Classifier":
230
+ # Pipeline to implement model
231
+
232
  naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters)
233
+
234
  naive_bayes_accuracy = clf.evaluate_model(naive_bayes_model)
235
  # naive_bayes_classification_report = clf.evaluate_classification_report(naive_bayes_model)
236
  # st.write("Naive Bayes Accuracy:", naive_bayes_accuracy)
 
305
 
306
  if spectra_1 is not None:
307
  spectra_df1 = pd.read_csv(spectra_1)
308
+ # Actual = spectra_df1['Disease']
309
+ #spectra_df1 = spectra_df1.drop(columns=['Disease'])
310
  st.write(spectra_df1.head(5))
311
  st.divider()
312
 
 
324
  if max_key == "Naive Bayes Classifier":
325
  # naive_bayes_model = clf.naive_bayes_classifier(model_hyperparameters)
326
  naive_bayes_model =naive_bayes_model.predict()
327
+ X['Predict'] = naive_bayes_model
328
+ st.write("Output : ", X)
329
+ st.write("Model used for Prediction is: Naive Bayes Model", naive_bayes_model)
330
 
331
  if max_key == "Logistic Regression":
 
332
  logistic_regression_model_ = logistic_regression_model.predict(X)
 
333
  X['Predict'] = logistic_regression_model_
 
334
  st.write("Output : ", X)
335
+ st.write("Model used for Prediction is: Logistic Regression")
 
 
 
 
336
 
337
  if max_key == "Decision Tree":
338
  decision_tree_model_ = decision_tree_model.predict(X)
339
  X['Predict'] = decision_tree_model_
340
+ #X['Actual'] = Actual
341
+ st.write("Model used for Prediction is: Decision Tree ", X)
342
 
343
  if max_key == "Random Forests":
344
  random_forests_model = random_forests_model.predict(X)
345
+ X['Predict'] = random_forests_model
346
+ st.write("Model used for Prediction is: Random Forests Model:\n Predictions are:", random_forests_model)
347
 
348
  if max_key == "SVM":
349
  svm_model = svm_model.predict(X)
350
+ X['Predict'] = random_forests_model
351
+ st.write("Model used for Prediction is: Support Vector Machines Model:", svm_model)
352
 
353
  if max_key == "KNN":
354
  knn_model = knn_model.predict(X)
355
+ X['Predict'] = random_forests_model
356
+ st.write("Model used for Prediction is: K-Nearest Neighbors Model:", knn_model)
357
 
358
  if max_key == "K- Means Clustering":
359
  kmeans_model =kmeans_model.predict(X)
360
+ X['Predict'] = random_forests_model
361
+ st.write("Model used for Prediction is: K-Means Clustering Model:", kmeans_model)
362
 
363
  st.divider()
364
 
 
366
  st.download_button(
367
  label="Download data as CSV",
368
  data=data_frame,
369
+ file_name='classifier_tagging_df.csv',
370
  mime='text/csv',
371
  )
372
 
373
  st.divider()
374
 
375
+
376
+ def regressor():
377
+ EDA, train, test = st.tabs(['Train','Test'])
378
+
379
+ with train:
380
+ st.title("Regression / Train data")
381
+ spectra = st.file_uploader("**Upload file**", type={"csv", "txt"})
382
+
383
+ if spectra is not None:
384
+ spectra_df = pd.read_csv(spectra)
385
+
386
+ st.write(spectra_df.head(5))
387
+ # st.write("Headers", spectra_df.columns.tolist())
388
+ st.write("**Total Rows**", spectra_df.shape[0])
389
+
390
+ st.divider()
391
+
392
+ option = st.text_input("**Select Output Column**:")
393
+ st.divider()
394
+
395
+ if option:
396
+ st.write("**You have selected output column**: ", option)
397
+
398
+ y = spectra_df[option]
399
+ X= spectra_df.drop(option, axis=1)
400
+
401
+ # Define the columns with your content
402
+ col1, col2 = st.columns([4,1], gap="small")
403
+
404
+ # Add content to col1
405
+ with col1:
406
+ st.write("Train data excluding output")
407
+ st.write(X.head(5))
408
+
409
+ # Add content to col2
410
+ with col2:
411
+ st.write("Output")
412
+ st.write(y.head(5))
413
+
414
+ st.divider()
415
+
416
+ # Select models
417
+ # models_list = [
418
+ # 'Linear Regression', 'Polynomial Regression', 'Ridge Regression',
419
+ # 'Lasso Regression', 'ElasticNet Regression', 'Logistic Regression',
420
+ # 'Decision Tree Regression', 'Random Forest Regression',
421
+ # 'Gradient Boosting Regression', 'Support Vector Regression (SVR)',
422
+ # 'XGBoost', 'LightGBM'
423
+ # ]
424
+
425
+ models_list = [
426
+ 'Linear Regression',
427
+ 'Polynomial Regression',
428
+ 'Ridge Regression',
429
+ 'Lasso Regression',
430
+ 'ElasticNet Regression',
431
+ 'Logistic Regression',
432
+ 'Decision Tree Regression',
433
+ 'Random Forest Regression',
434
+ 'Gradient Boosting Regression',
435
+ 'Support Vector Regression (SVR)',
436
+ 'XGBoost',
437
+ 'LightGBM'
438
+ ]
439
+
440
+ selected_models = st.multiselect('Select Regression Models', models_list)
441
+
442
+ if selected_models:
443
+ # Initialize RegressionModels class
444
+ models = RegressionModels()
445
+
446
+ # Add data
447
+ models.add_data(X, y)
448
+
449
+ # Split data into training and testing sets
450
+ models.split_data()
451
+
452
+ # Train and evaluate selected models
453
+ for model_name in selected_models:
454
+ st.subheader(f"Model: {model_name}")
455
+ models.fit(model_name)
456
+ y_pred = models.train(model_name)
457
+ mse, r2 = models.evaluate(model_name)
458
+ st.write(f"MSE: {mse}")
459
+ st.write(f"R-squared: {r2}")
460
+
461
+
462
+ def NLP():
463
+ Gemini_Chat,Gemini_Vision,Gemini_PDF, Bert, = st.tabs(['Gemini-Chat','Gemini-Vision',"Gemini-PDF Chat",'ChatBot'])
464
+
465
+ with Gemini_Chat:
466
+ st.title("Chat with Gemini Pro")
467
+ st.write("Note: ask basic question from LLMs")
468
+ gemini_model()
469
+
470
+ with Gemini_Vision:
471
+
472
+ st.header("Chat with Image using Gemini ")
473
+ st.write("Note: upload single image and ask question related to Image, and Input the relative prompt to ask question:")
474
+ input=st.text_input("Input Prompt: ",key="input_prompt")
475
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
476
+ image=""
477
+
478
+ if uploaded_file is not None:
479
+ image = Image.open(uploaded_file)
480
+ #image = Image.open(io.BytesIO(uploaded_file.read()))
481
+
482
+ st.image(image, caption="Uploaded Image.", use_column_width=True)
483
+
484
+ submit=st.button("Tell me about the image")
485
+ ## If ask button is clicked
486
+ if submit:
487
+ response=get_gemini_response_vision(input,image)
488
+ st.subheader("The Response is")
489
+ st.write(response)
490
+
491
+ with Gemini_PDF:
492
+ st.title(" Working on the model, will add soon.")
493
+
494
+ with Bert:
495
+ st.title(" Working on the model, will add soon.")
496
+
497
+
498
+ def deep_learning():
499
+ st.title("Deep Learning Models")
500
+ st.write("Needs to add projects of deep learning")
501
+
502
+
503
+ def resume():
504
+ st.title("Resume")
505
+ st.write("")
506
+ About, Work_Experience,Skills_Tools, Education_Certification = st.tabs(["About", "Work Experience","Skills & Tools", "Education & Certificates"])
507
+
508
+ with About:
509
+ Resume().display_information()
510
+
511
+ with Work_Experience:
512
+ Resume().display_work_experience()
513
+
514
+ with Skills_Tools:
515
+ Resume().skills_tools()
516
+
517
+ with Education_Certification:
518
+ Resume().display_education_certificate()
519
+
520
+
521
+
522
+ # Main function to run the app
523
+ def main():
524
+
525
+ st.sidebar.title("Deep Learning/ Data Science/ AI Models")
526
+ # page_options = ["Classification", "Regressor", "NLP", "Image", "Voice", "Video", "LLMs"]
527
+ page_options = ["Chatbot & NLP" ,"Classification", "Regressor","Deep Learning", "Resume"]
528
+ choice = st.sidebar.radio("Select", page_options)
529
+
530
+ if choice == "Classification":
531
+ classification()
532
+
533
  elif choice == "Regressor":
534
  regressor()
535
+ elif choice == "Chatbot & NLP":
536
  NLP()
537
 
538
+ if choice == "Deep Learning":
539
+ deep_learning()
 
 
 
540
 
 
 
 
 
 
541
  if choice == 'Resume':
542
  resume()
543
 
classification.py CHANGED
@@ -1,3 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from sklearn.model_selection import train_test_split, GridSearchCV
2
  from sklearn.naive_bayes import GaussianNB
3
  from sklearn.linear_model import LogisticRegression
@@ -81,3 +203,5 @@ class ClassificationModels:
81
  def predict_output(self, model):
82
  y_pred = model.predict(self.X_test)
83
  return y_pred
 
 
 
1
+ from sklearn.pipeline import Pipeline
2
+ from sklearn.compose import ColumnTransformer
3
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
4
+ from sklearn.impute import SimpleImputer
5
+ from sklearn.model_selection import train_test_split, GridSearchCV
6
+ from sklearn.naive_bayes import GaussianNB
7
+ from sklearn.linear_model import LogisticRegression
8
+ from sklearn.tree import DecisionTreeClassifier
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn.svm import SVC
11
+ from sklearn.neighbors import KNeighborsClassifier
12
+ from sklearn.cluster import KMeans
13
+ from sklearn.metrics import accuracy_score, classification_report
14
+
15
+ class ClassificationModels:
16
+ def __init__(self, X, y=None, hyperparameters=None):
17
+ self.X = X
18
+ self.y = y
19
+ self.hyperparameters = hyperparameters
20
+
21
+ def split_data(self, test_size=0.2, random_state=42):
22
+ self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
23
+ self.X, self.y, test_size=test_size, random_state=random_state
24
+ )
25
+
26
+ def build_preprocessor(self):
27
+ # Separate numerical and categorical columns
28
+ numeric_features = self.X.select_dtypes(include=['int64', 'float64']).columns
29
+ categorical_features = self.X.select_dtypes(include=['object']).columns
30
+
31
+ # Define transformers for numerical and categorical data
32
+ numeric_transformer = Pipeline(steps=[
33
+ ('imputer', SimpleImputer(strategy='mean')),
34
+ ('scaler', StandardScaler())
35
+ ])
36
+ categorical_transformer = Pipeline(steps=[
37
+ ('imputer', SimpleImputer(strategy='most_frequent')),
38
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
39
+ ])
40
+
41
+ # Combine transformers using ColumnTransformer
42
+ preprocessor = ColumnTransformer(
43
+ transformers=[
44
+ ('num', numeric_transformer, numeric_features),
45
+ ('cat', categorical_transformer, categorical_features)
46
+ ])
47
+ return preprocessor
48
+
49
+ def build_model_pipeline(self, classifier):
50
+ # Build preprocessor
51
+ preprocessor = self.build_preprocessor()
52
+
53
+ # Combine preprocessor with classifier in a pipeline
54
+ model_pipeline = Pipeline(steps=[
55
+ ('preprocessor', preprocessor),
56
+ ('classifier', classifier)
57
+ ])
58
+ return model_pipeline
59
+
60
+
61
+ def evaluate_model(self, model):
62
+ model.fit(self.X_train, self.y_train)
63
+ accuracy = model.score(self.X_test, self.y_test)
64
+ return accuracy
65
+
66
+ def evaluate_classification_report(self, model):
67
+ y_pred = model.predict(self.X_test)
68
+ return classification_report(self.y_test, y_pred, output_dict=True)
69
+
70
+ def naive_bayes_classifier(self,params = None):
71
+ model = GaussianNB()
72
+ return self.build_model_pipeline(model)
73
+
74
+ def logistic_regression(self, params=None):
75
+ model = LogisticRegression()
76
+ if self.hyperparameters and 'logistic_regression' in self.hyperparameters:
77
+ model = GridSearchCV(model, params, cv=5)
78
+ return self.build_model_pipeline(model)
79
+
80
+ def decision_tree(self, params=None):
81
+ model = DecisionTreeClassifier()
82
+ if self.hyperparameters and 'decision_tree' in self.hyperparameters:
83
+ model = GridSearchCV(model, params=self.hyperparameters['decision_tree'], cv=5)
84
+ return self.build_model_pipeline(model)
85
+
86
+ def random_forests(self, params=None):
87
+ model = RandomForestClassifier()
88
+ if self.hyperparameters and 'random_forests' in self.hyperparameters:
89
+ model = GridSearchCV(model, params=self.hyperparameters['random_forests'], cv=5)
90
+ return self.build_model_pipeline(model)
91
+
92
+ def support_vector_machines(self, params=None):
93
+ model = SVC()
94
+ if self.hyperparameters and 'support_vector_machines' in self.hyperparameters:
95
+ model = GridSearchCV(model, params=self.hyperparameters['support_vector_machines'], cv=5)
96
+ return self.build_model_pipeline(model)
97
+
98
+ def k_nearest_neighbour(self, params=None):
99
+ model = KNeighborsClassifier()
100
+ if self.hyperparameters and 'k_nearest_neighbour' in self.hyperparameters:
101
+ model = GridSearchCV(model, params=self.hyperparameters['k_nearest_neighbour'], cv=5)
102
+ return self.build_model_pipeline(model)
103
+
104
+ def k_means_clustering(self, n_clusters):
105
+ model = KMeans(n_clusters=n_clusters)
106
+ return model
107
+
108
+ def evaluate_model(self, model):
109
+ model.fit(self.X_train, self.y_train)
110
+ accuracy = model.score(self.X_test, self.y_test)
111
+ return accuracy
112
+
113
+ def evaluate_classification_report(self, model):
114
+ y_pred = model.predict(self.X_test)
115
+ return classification_report(self.y_test, y_pred, output_dict=True)
116
+
117
+ def predict_output(self, model):
118
+ return model.predict(self.X_test)
119
+
120
+
121
+
122
+ """
123
  from sklearn.model_selection import train_test_split, GridSearchCV
124
  from sklearn.naive_bayes import GaussianNB
125
  from sklearn.linear_model import LogisticRegression
 
203
  def predict_output(self, model):
204
  y_pred = model.predict(self.X_test)
205
  return y_pred
206
+
207
+ """
faiss_index/index.faiss ADDED
Binary file (286 kB). View file
 
faiss_index/index.pkl ADDED
Binary file (933 kB). View file
 
requirements.txt CHANGED
@@ -5,4 +5,9 @@ streamlit==1.32.0
5
  transformers==4.39.2
6
  xgboost==2.0.3
7
  google.generativeai
8
- python-dotenv
 
 
 
 
 
 
5
  transformers==4.39.2
6
  xgboost==2.0.3
7
  google.generativeai
8
+ python-dotenv
9
+ langchain
10
+ PyPDF2
11
+ chromadb
12
+ faiss-cpu
13
+ langchain_google_genai