Spaces:
Running
Running
from sklearn.pipeline import Pipeline | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
from sklearn.impute import SimpleImputer | |
from sklearn.model_selection import train_test_split, GridSearchCV | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.svm import SVC | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.cluster import KMeans | |
from sklearn.metrics import accuracy_score, classification_report | |
class ClassificationModels: | |
def __init__(self, X, y=None, hyperparameters=None): | |
self.X = X | |
self.y = y | |
self.hyperparameters = hyperparameters | |
def split_data(self, test_size=0.2, random_state=42): | |
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( | |
self.X, self.y, test_size=test_size, random_state=random_state | |
) | |
def build_preprocessor(self): | |
# Separate numerical and categorical columns | |
numeric_features = self.X.select_dtypes(include=['int64', 'float64']).columns | |
categorical_features = self.X.select_dtypes(include=['object']).columns | |
# Define transformers for numerical and categorical data | |
numeric_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='mean')), | |
('scaler', StandardScaler()) | |
]) | |
categorical_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='most_frequent')), | |
('onehot', OneHotEncoder(handle_unknown='ignore')) | |
]) | |
# Combine transformers using ColumnTransformer | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numeric_transformer, numeric_features), | |
('cat', categorical_transformer, categorical_features) | |
]) | |
return preprocessor | |
def build_model_pipeline(self, classifier): | |
# Build preprocessor | |
preprocessor = self.build_preprocessor() | |
# Combine preprocessor with classifier in a pipeline | |
model_pipeline = Pipeline(steps=[ | |
('preprocessor', preprocessor), | |
('classifier', classifier) | |
]) | |
return model_pipeline | |
def evaluate_model(self, model): | |
model.fit(self.X_train, self.y_train) | |
accuracy = model.score(self.X_test, self.y_test) | |
return accuracy | |
def evaluate_classification_report(self, model): | |
y_pred = model.predict(self.X_test) | |
return classification_report(self.y_test, y_pred, output_dict=True) | |
def naive_bayes_classifier(self,params = None): | |
model = GaussianNB() | |
return self.build_model_pipeline(model) | |
def logistic_regression(self, params=None): | |
model = LogisticRegression() | |
if self.hyperparameters and 'logistic_regression' in self.hyperparameters: | |
model = GridSearchCV(model, params, cv=5) | |
return self.build_model_pipeline(model) | |
def decision_tree(self, params=None): | |
model = DecisionTreeClassifier() | |
if self.hyperparameters and 'decision_tree' in self.hyperparameters: | |
model = GridSearchCV(model, params=self.hyperparameters['decision_tree'], cv=5) | |
return self.build_model_pipeline(model) | |
def random_forests(self, params=None): | |
model = RandomForestClassifier() | |
if self.hyperparameters and 'random_forests' in self.hyperparameters: | |
model = GridSearchCV(model, params=self.hyperparameters['random_forests'], cv=5) | |
return self.build_model_pipeline(model) | |
def support_vector_machines(self, params=None): | |
model = SVC() | |
if self.hyperparameters and 'support_vector_machines' in self.hyperparameters: | |
model = GridSearchCV(model, params=self.hyperparameters['support_vector_machines'], cv=5) | |
return self.build_model_pipeline(model) | |
def k_nearest_neighbour(self, params=None): | |
model = KNeighborsClassifier() | |
if self.hyperparameters and 'k_nearest_neighbour' in self.hyperparameters: | |
model = GridSearchCV(model, params=self.hyperparameters['k_nearest_neighbour'], cv=5) | |
return self.build_model_pipeline(model) | |
def k_means_clustering(self, n_clusters): | |
model = KMeans(n_clusters=n_clusters) | |
return model | |
def evaluate_model(self, model): | |
model.fit(self.X_train, self.y_train) | |
accuracy = model.score(self.X_test, self.y_test) | |
return accuracy | |
def evaluate_classification_report(self, model): | |
y_pred = model.predict(self.X_test) | |
return classification_report(self.y_test, y_pred, output_dict=True) | |
def predict_output(self, model): | |
return model.predict(self.X_test) | |
""" | |
from sklearn.model_selection import train_test_split, GridSearchCV | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.svm import SVC | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.cluster import KMeans | |
from sklearn.metrics import accuracy_score | |
from sklearn.metrics import classification_report | |
class ClassificationModels: | |
def __init__(self, X, y= None,hyperparameters=None): | |
self.X = X | |
self.y = y | |
self.hyperparameters = hyperparameters | |
def split_data(self, test_size=0.2, random_state=42): | |
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( | |
self.X, self.y, test_size=test_size, random_state=random_state | |
) | |
def naive_bayes_classifier(self, param = None): | |
model = GaussianNB() | |
model.fit(self.X_train, self.y_train) | |
return model | |
def logistic_regression(self, params=None): | |
model = LogisticRegression() | |
if self.hyperparameters and 'logistic_regression' in self.hyperparameters: | |
model = GridSearchCV(model, params, cv=5) | |
model.fit(self.X_train, self.y_train) | |
return model | |
def decision_tree(self, params=None): | |
model = DecisionTreeClassifier() | |
if self.hyperparameters and 'decision_tree' in self.hyperparameters: | |
model = GridSearchCV(model, params =self.hyperparameters['decision_tree'], cv=5) | |
model.fit(self.X_train, self.y_train) | |
return model | |
def random_forests(self, params=None): | |
model = RandomForestClassifier() | |
if self.hyperparameters and 'random_forests' in self.hyperparameters: | |
model = GridSearchCV(model, params= self.hyperparameters['random_forests'], cv=5) | |
model.fit(self.X_train, self.y_train) | |
return model | |
def support_vector_machines(self, params=None): | |
model = SVC() | |
if self.hyperparameters and 'support_vector_machines' in self.hyperparameters: | |
model = GridSearchCV(model, params= self.hyperparameters['support_vector_machines'], cv=5) | |
model.fit(self.X_train, self.y_train) | |
return model | |
def k_nearest_neighbour(self, params=None): | |
model = KNeighborsClassifier() | |
if self.hyperparameters and 'k_nearest_neighbour' in self.hyperparameters: | |
st.write(self.hyperparameters['k_nearest_neighbour']) | |
model = GridSearchCV(model, params = self.hyperparameters['k_nearest_neighbour'], cv=5) | |
model.fit(self.X_train, self.y_train) | |
return model | |
def k_means_clustering(self, n_clusters): | |
model = KMeans(n_clusters=n_clusters) | |
model.fit(self.X_train) | |
return model | |
def evaluate_model(self, model): | |
y_pred = model.predict(self.X_test) | |
accuracy = accuracy_score(self.y_test, y_pred) | |
return accuracy | |
def evaluate_classification_report(self,model): | |
y_pred = model.predict(self.X_test) | |
return classification_report(self.y_test, y_pred, output_dict=True) | |
def predict_output(self, model): | |
y_pred = model.predict(self.X_test) | |
return y_pred | |
""" |