Jhoeel Luna commited on
Commit
bb979cd
0 Parent(s):

Duplicate from Jhoeel/rfmAutoV2

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +72 -0
  4. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RfmAuto
3
+ emoji: 💩
4
+ colorFrom: indigo
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.19.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: openrail
11
+ duplicated_from: Jhoeel/rfmAutoV2
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import datetime
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.cluster import KMeans
7
+
8
+ def calculate_rfm(df):
9
+ # Convert 'Fecha compra' to datetime and calculate recency
10
+ df['Fecha compra'] = pd.to_datetime(df['Fecha compra'], format='%m/%d/%Y')
11
+ today = datetime.datetime.now().date()
12
+ fecha_actual = pd.to_datetime(today).to_numpy().astype('datetime64[D]')
13
+ df['recencia'] = (fecha_actual - df['Fecha compra'].to_numpy().astype('datetime64[D]'))
14
+ df['recencia'] = df['recencia'].astype('timedelta64[D]').astype(int)
15
+
16
+ # Group by 'Email' and calculate frequency and monetary value
17
+ grouped = df.groupby('Email')
18
+ frequency = grouped['Email'].count().to_frame().rename(columns={"Email": "frecuencia"})
19
+ monetary = grouped['Valor compra'].sum().to_frame().rename(columns={'Valor compra': 'monetario'})
20
+ monetary['monetario'] = monetary['monetario'].round(2)
21
+
22
+ # Join the recency dataframe with frequency and monetary dataframes
23
+ df = df.join(frequency, on='Email')
24
+ df = df.join(monetary, on='Email')
25
+
26
+ # Keep only the latest purchase for each customer
27
+ df = df.sort_values(by=['Email', 'Fecha compra'], ascending=False)
28
+ df = df.drop_duplicates(subset='Email', keep='first')
29
+
30
+ # Clean up the final dataframe
31
+ df.drop(['Fecha compra', 'Valor compra'], axis=1, inplace=True)
32
+ df.set_index('Email', inplace=True)
33
+
34
+ # Scale the features
35
+ scaler = StandardScaler()
36
+ scaled_columns = ['recencia', 'frecuencia', 'monetario']
37
+ scaled_values = scaler.fit_transform(df[scaled_columns])
38
+ z_scores = np.abs(scaled_values)
39
+ outlier_mask = (z_scores > 3).any(axis=1)
40
+
41
+ for i, column in enumerate(scaled_columns):
42
+ df[f"{column}_scaled"] = scaled_values[:, i]
43
+
44
+ df = df[~outlier_mask]
45
+
46
+ # Cluster the data
47
+ np.random.seed(0)
48
+ scaled_columns = ['recencia_scaled', 'frecuencia_scaled', 'monetario_scaled']
49
+ kmeans = KMeans(n_clusters=5, n_init=10)
50
+ rfm_clusters = kmeans.fit_predict(df[scaled_columns])
51
+ df = df.copy()
52
+ df['cluster'] = rfm_clusters
53
+
54
+ # Drop the scaled columns
55
+ df.drop(scaled_columns, axis=1, inplace=True)
56
+
57
+ # Reset the index
58
+ df = df.reset_index()
59
+
60
+ # Return the desired columns
61
+ return df[['Email', 'recencia', 'frecuencia', 'monetario', 'cluster']]
62
+
63
+
64
+ def read_csv(file):
65
+ df = pd.read_csv(file.name)
66
+ return calculate_rfm(df)
67
+
68
+ iface = gr.Interface(fn=read_csv,
69
+ inputs=[gr.inputs.File(label="Select a CSV file")],
70
+ outputs="dataframe")
71
+
72
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pandas
2
+ scikit-learn