Spaces:
Runtime error
Runtime error
File size: 4,835 Bytes
9afdcaf 30527e5 9afdcaf fd320cd 44dc288 fd320cd 9afdcaf 44dc288 9afdcaf 44dc288 9afdcaf f10abf9 fd320cd 9afdcaf fd320cd 9afdcaf fd320cd 9afdcaf 3d911a8 9afdcaf 16da787 9afdcaf 6471834 9afdcaf 6471834 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
def read(file,dep,ord):
df = pd.read_csv(file.name)
cat = list()
dep_type = str(df.dtypes[dep])
for col in df.columns.values:
if str(df.dtypes[col]) == 'bool' or str(df.dtypes[col]) == 'object':
cat.append(col)
new_df = df.dropna(axis=0)
if ord == "" and (dep_type == 'bool' or dep_type == 'object'):
ord = list()
ord.append(dep)
elif ord == "":
ord = list()
else:
pass
if len(ord)!=0:
le = LabelEncoder()
new_df[ord] = new_df[ord].apply(lambda col: le.fit_transform(col))
nom = list(set(cat).difference(set(ord)))
if len(nom) == 0:
pass
else:
ohe_df = pd.get_dummies(new_df[nom], drop_first=True)
new_df.drop(columns=nom, axis=1,inplace=True)
new_df = pd.concat([new_df,ohe_df],axis=1)
if dep_type == 'bool' or dep_type == 'object':
text = "classification"
result = classification(new_df,dep)
else:
text = "regression"
result = regression(new_df,dep)
return df.head(5),new_df.head(5),result, text, cat, ord, nom
def classification(df,dep):
X = df.drop(dep,axis=1)
y = df[dep]
X_train, X_test, y_train, y_test = train_test_split(X, y)
scale = StandardScaler()
pipe = Pipeline(steps=[('scale',scale),('classification','pass')])
parameters = [
{
'classification':[LogisticRegression()],
},
{
'classification':[RandomForestClassifier()],
},
{
'classification':[DecisionTreeClassifier()],
},
{
'classification':[SVC()],
},
{
'classification':[KNeighborsClassifier(n_neighbors=5)],
},
]
search = GridSearchCV(pipe, param_grid=parameters, n_jobs=-1, scoring='accuracy')
search.fit(X_train,y_train)
result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]
result['mean_test_score']= (result['mean_test_score'])*100
result = result.astype({'params': str})
result.sort_values('rank_test_score',inplace=True)
return result
def regression(df,dep):
X = df.drop(dep,axis=1)
y =df[dep]
X_train, X_test, y_train, y_test = train_test_split(X, y)
scale = StandardScaler()
pipe = Pipeline(steps=[('scale',scale),('regression','pass')])
parameters = [
{
'regression':[LinearRegression()]
},
{
'regression':[RandomForestRegressor()],
},
{
'regression':[DecisionTreeRegressor()],
},
{
'regression':[SVR()],
},
]
search = GridSearchCV(pipe, param_grid=parameters, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
search.fit(X_train,y_train)
result = pd.DataFrame(search.cv_results_)[['params','rank_test_score','mean_test_score']]
result['mean_test_score']= (result['mean_test_score']+1)*100
result = result.astype({'params': str})
result.sort_values('rank_test_score',inplace=True)
return result
with gr.Blocks() as demo:
gr.Markdown("Model Recommendation App **Upload** file to see the output.")
with gr.Column():
with gr.Row():
file = gr.File(label="Upload File(Comma Separated)")
dep = gr.Textbox(label="Dependent Variable(Variable as in the file)")
ord = gr.Textbox(label="Ordinal Variables(Seperate with a comma)")
submit = gr.Button("Submit")
text = gr.Text(label="Suitable Algorithm")
other1 = gr.Text(label="Categorical Variables")
other2 = gr.Text(label="LabelEncoded Vairables")
other3 = gr.Text(label="OneHotEncoded Variables")
with gr.Row():
org = gr.DataFrame(overflow_row_behaviour="paginate", label="Original Data")
converted = gr.DataFrame(overflow_row_behaviour="paginate", label="Transformed Data")
result = gr.DataFrame(label="Result")
submit.click(fn=read, inputs=[file,dep,ord], outputs=[org,converted,result,text,other1,other2,other3])
demo.launch() |