import streamlit as st | |
import numpy as np | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.svm import SVC | |
def process(data): | |
if data[0] == None or data[1] == None: # if either training or testing dataset is still missing | |
st.info('Please Upload Data') | |
return None | |
if 'object' in list(data[0].dtypes) or 'object' in list(data[1].dtypes): | |
st.info('Please Upload Numerica Data.') | |
return None | |
x_train = data[0].iloc[:,:-1] | |
y_train = data[0].iloc[:,-1] | |
#st.write(x_train.shape) | |
x_test = data[1].iloc[:,:x_train.shape[1]] | |
#st.dataframe(data[1]) | |
#st.write(x_test.shape) | |
if len(x_train.columns) != len(x_test.columns): | |
st.info('Training and testing datasets have different column number, cannot perform classification.') | |
return None | |
clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) | |
clf.fit(x_train, y_train) | |
pred = clf.predict(x_test) | |
x_test[data[0].columns[-1]] = pred | |
return x_test |