Spaces:
Sleeping
Sleeping
File size: 3,616 Bytes
9955bec 74bb291 5132ee1 36a2a21 5132ee1 f3d6f45 9955bec 2e76177 5132ee1 36a2a21 f3d6f45 36a2a21 f3d6f45 9955bec 74bb291 9955bec 74bb291 9955bec 74bb291 9955bec 0e94748 74bb291 9955bec 74bb291 f3d6f45 9955bec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import gradio as gr
from gradio import components
import pandas as pd
import numpy as np
from datasets import load_dataset
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.naive_bayes import MultinomialNB
# Lectura del CSV desde un data set
dataset = load_dataset("animonte/bank-state-data")
df = pd.DataFrame(dataset["train"])
# Primera eliminaci贸n de columnas
df = df.drop(['Unnamed: 0','RowNumber','Surname','CustomerId'], axis=1)
# Transformaci贸n de variables categ贸ricas a num茅ricas
Gender={'Female':0,'Male':1}
Geography = {'Texas':1,'California':2,'Alabama':3}
df['Gender'].replace(Gender, inplace=True)
df['Geography'].replace(Geography, inplace=True)
# Imputaci贸n de nulos
df = df.apply(lambda x:x.fillna(x.median()))
# Imputaci贸n de outliers
def imputar_outliers(df, nombre_columna):
Q3 = np.percentile(df[nombre_columna], 75)
Q1 = np.percentile(df[nombre_columna], 25)
RI = Q3 - Q1
limite_superior = Q3 + 1.5 * RI
limite_inferior = Q1 - 1.5 * RI
df[nombre_columna] = np.where(df[nombre_columna] > limite_superior,
np.percentile(df[nombre_columna], 97),
df[nombre_columna])
df[nombre_columna] = np.where(df[nombre_columna] < limite_inferior,
np.percentile(df[nombre_columna], 5),
df[nombre_columna])
return df[nombre_columna]
variables_outlier = ['Age','Tenure']
for col in variables_outlier:
df[col] = imputar_outliers(df, col)
#print(df)
# Fin Imputaci贸n de outliers
# Algoritmo Regresi贸n Log铆stica para modelo predictivo
# Variables incluidas en el modelo
pred_labels = ['CreditScore', 'Age', 'Balance', 'EstimatedSalary']
X = df[pred_labels]
y = df['Exited']
# Subdividimos el dataset
kfold = KFold(n_splits=3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# Modelo 脕rbol de Decicis贸n
modelo_ajusta_train = tree.DecisionTreeClassifier(max_depth = 4, criterion = 'gini').fit(X_train, y_train)
modelo_entrenado = modelo_ajusta_train.predict_proba(X_test)
# Modelo Naive Bayes
#Creamos un objeto de Naive Bayes Multinomial
modelo_naives = MultinomialNB()
#Entrenamos el modelo con los datos de entrenamiento
modelo_naives.fit(X_train,y_train)
# Interfaz grafica
def predict(Score, Age, Balance, Salary):
inputs = [Score, Age, Balance, Salary]
probabilidad_de_que_sea_1 = modelo_ajusta_train.predict_proba([inputs])[0][1]
if probabilidad_de_que_sea_1 > 0.08:
prediccion_arbol = "Abandona el banco"
else:
prediccion_arbol = "Se queda en el banco."
predicciones_naives = modelo_naives.predict([inputs])
if predicciones_naives == 0:
resultado_naives = "Se queda en el banco."
else:
resultado_naives = "Abandona el banco"
return prediccion_arbol, resultado_naives
output_tree = components.Textbox(label='Resultado con el modelo Tree con una sensibilidad del 0.08')
output_naives = components.Textbox(label='Resultado con el modelo Naives')
demo = gr.Interface(
fn=predict,
inputs=[gr.Slider(350, 850), "number","number","number"],
outputs=[output_tree, output_naives],
allow_flagging="never"
)
demo.launch()
|