import gradio as gr import pandas as pd import numpy as np import datetime from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans def calculate_rfm(df): # Convert 'Fecha compra' to datetime and calculate recency df['Fecha compra'] = pd.to_datetime(df['Fecha compra'], format='%m/%d/%Y') today = datetime.datetime.now().date() fecha_actual = pd.to_datetime(today).to_numpy().astype('datetime64[D]') df['recencia'] = (fecha_actual - df['Fecha compra'].to_numpy().astype('datetime64[D]')) df['recencia'] = df['recencia'].astype('timedelta64[D]').astype(int) # Group by 'Email' and calculate frequency and monetary value grouped = df.groupby('Email') frequency = grouped['Email'].count().to_frame().rename(columns={"Email": "frecuencia"}) monetary = grouped['Valor compra'].sum().to_frame().rename(columns={'Valor compra': 'monetario'}) monetary['monetario'] = monetary['monetario'].round(2) # Join the recency dataframe with frequency and monetary dataframes df = df.join(frequency, on='Email') df = df.join(monetary, on='Email') # Keep only the latest purchase for each customer df = df.sort_values(by=['Email', 'Fecha compra'], ascending=False) df = df.drop_duplicates(subset='Email', keep='first') # Clean up the final dataframe df.drop(['Fecha compra', 'Valor compra'], axis=1, inplace=True) df.set_index('Email', inplace=True) # Scale the features scaler = StandardScaler() scaled_columns = ['recencia', 'frecuencia', 'monetario'] scaled_values = scaler.fit_transform(df[scaled_columns]) z_scores = np.abs(scaled_values) outlier_mask = (z_scores > 3).any(axis=1) for i, column in enumerate(scaled_columns): df[f"{column}_scaled"] = scaled_values[:, i] df = df[~outlier_mask] # Cluster the data np.random.seed(0) scaled_columns = ['recencia_scaled', 'frecuencia_scaled', 'monetario_scaled'] kmeans = KMeans(n_clusters=5, n_init=10) rfm_clusters = kmeans.fit_predict(df[scaled_columns]) df = df.copy() df['cluster'] = rfm_clusters df['cluster'] = df['cluster'].replace({4: 'Dormidos', 0: 'Nuevos', 2: 'Potenciales', 1: 'En riesgo', 3: 'Fieles'}) # Drop the scaled columns df.drop(scaled_columns, axis=1, inplace=True) # Reset the index df = df.reset_index() # Return the desired columns return df[['Email', 'recencia', 'frecuencia', 'monetario', 'cluster']] def read_csv(file): df = pd.read_csv(file.name) return calculate_rfm(df).head(10) demo= gr.Interface(fn=read_csv, inputs=[gr.components.File(label="Select a CSV file")], outputs="dataframe", title="RFM Automatizado con Inteligencia Artificial") if __name__ == "__main__": demo.launch()