Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- .gitattributes +1 -0
- XGB_best_model.pkl +3 -0
- app.py +35 -0
- eda.py +183 -0
- fraud_test.csv +3 -0
- prediction.py +153 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
fraud_test.csv filter=lfs diff=lfs merge=lfs -text
|
XGB_best_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d0f429f5bcb9ed9f8da34547feee4b3f0e491048606dce13a3caa7c1243613e
|
3 |
+
size 89072
|
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
from eda import eda_page
|
7 |
+
from prediction import model_page
|
8 |
+
|
9 |
+
#Load data
|
10 |
+
data = pd.read_csv("fraud_test.csv")
|
11 |
+
|
12 |
+
st.header('Milestone 2')
|
13 |
+
st.write("""
|
14 |
+
Created by Reski Hidayat - HCK015 """)
|
15 |
+
|
16 |
+
st.write("This program is made to predict Credit Card Fraud using Model Classification.")
|
17 |
+
st.write("Dataset `fraud_test`")
|
18 |
+
data
|
19 |
+
|
20 |
+
def main():
|
21 |
+
# Define menu options
|
22 |
+
menu_options = ["Data Analysis", "Model Prediction"]
|
23 |
+
|
24 |
+
# Create sidebar menu
|
25 |
+
selected_option = st.sidebar.radio("Menu", menu_options)
|
26 |
+
|
27 |
+
# Display selected page
|
28 |
+
if selected_option == "Data Analysis":
|
29 |
+
eda_page()
|
30 |
+
elif selected_option == "Model Prediction":
|
31 |
+
model_page()
|
32 |
+
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
main()
|
eda.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import seaborn as sns
|
6 |
+
|
7 |
+
# Load data from a CSV file
|
8 |
+
data = pd.read_csv('fraud_test.csv')
|
9 |
+
|
10 |
+
# def annotate_bar(ax, custom_y_func, font_size=14):
|
11 |
+
|
12 |
+
# for p in ax.patches:
|
13 |
+
# # Calculate annotation
|
14 |
+
# value = str(round(p.get_height(), 1))
|
15 |
+
# x = (p.get_x() + p.get_width() / 2) * 0.99
|
16 |
+
# y = ((p.get_y() + p.get_height() / 2) * 0.99)
|
17 |
+
|
18 |
+
# y = custom_y_func(y)
|
19 |
+
# ax.annotate(
|
20 |
+
# value,
|
21 |
+
# (x, y),
|
22 |
+
# color="black",
|
23 |
+
# size=font_size, ha='center', va='center'
|
24 |
+
# )
|
25 |
+
|
26 |
+
def eda_page():
|
27 |
+
|
28 |
+
st.title("Eksploratory Data Analysis")
|
29 |
+
st.write('Analyze the DataFrame for Better Understanding')
|
30 |
+
st.markdown("<h2><b>Top 10 Transaction Amount</b></h2>", unsafe_allow_html=True)
|
31 |
+
|
32 |
+
# TOP Transaction Amount
|
33 |
+
columns = ['job', 'state', 'city', 'merchant']
|
34 |
+
fraud_labels = ['Not Fraud', 'Fraud']
|
35 |
+
|
36 |
+
for col in columns:
|
37 |
+
st.subheader(f"Top 10 transaction amount by {col}")
|
38 |
+
fig, ax = plt.subplots(1, 2, figsize=(30, 5))
|
39 |
+
for i, fraud_label in enumerate(fraud_labels):
|
40 |
+
temp_data = data[data['is_fraud'] == (0 if fraud_label == 'Not Fraud' else 1)]
|
41 |
+
top = temp_data.groupby(col)['amt'].sum().nlargest(10)
|
42 |
+
ax[i].bar(top.index, top.values, color='#a1c9f4')
|
43 |
+
ax[i].set_title(fraud_label)
|
44 |
+
ax[i].set_xlabel(col)
|
45 |
+
ax[i].set_ylabel('Amount')
|
46 |
+
if col == 'state':
|
47 |
+
ax[i].tick_params(axis='x', rotation=0)
|
48 |
+
else:
|
49 |
+
ax[i].tick_params(axis='x', rotation=90)
|
50 |
+
st.pyplot(fig)
|
51 |
+
st.write("**Explanation**:")
|
52 |
+
markdown_text = """
|
53 |
+
* From the top 10 transaction amount by job we can see `Therapist` have the most fraud with almost 4000 transaction amount meanwhile `Film/Video editor` are the most non fraud with 30.000 transaction
|
54 |
+
* From the top 10 transaction amount by state we can see `NY` have the most fraud with 10.000 transaction amount meanwhile `TX`are the most non fraud with above 250.000 transaction
|
55 |
+
* From the top 10 transaction amount by city we can see `Camden` have the most fraud with 3500 transaction amount meanwhile `Naples` are the most non fraud with 250.000 transaction
|
56 |
+
* From the top 10 transaction amount by merchant we can see `Commier` have most fraud with 3000 transaction amount meanwhile `Corwin-Romaguera` are the most non fraud with almost 250.000 transaction
|
57 |
+
"""
|
58 |
+
st.markdown(markdown_text)
|
59 |
+
|
60 |
+
st.markdown("<h2><b>Top 10 Transaction Count</b></h2>", unsafe_allow_html=True)
|
61 |
+
# By Transaction count
|
62 |
+
columns = ['job', 'state', 'city', 'merchant']
|
63 |
+
columns_name = ['Job', 'State', 'City', 'Merchant']
|
64 |
+
fraud = ['Not Fraud', 'Fraud']
|
65 |
+
|
66 |
+
for col, name in zip(columns, columns_name):
|
67 |
+
st.subheader(f"Top 10 transaction by {name}")
|
68 |
+
fig, ax = plt.subplots(1, 2, figsize=(30, 5))
|
69 |
+
sns.set_palette("pastel")
|
70 |
+
for i, fraud_label in enumerate(fraud):
|
71 |
+
temp_data = data[data['is_fraud'] == (0 if fraud_label == 'Not Fraud' else 1)]
|
72 |
+
top = temp_data.groupby(col).size().nlargest(10)
|
73 |
+
ax[i].bar(top.index, top.values, color='#a1c9f4')
|
74 |
+
ax[i].set_title(fraud_label)
|
75 |
+
ax[i].set_xlabel(name)
|
76 |
+
ax[i].set_ylabel('Count')
|
77 |
+
if col == 'state':
|
78 |
+
ax[i].tick_params(axis='x', rotation=0)
|
79 |
+
else:
|
80 |
+
ax[i].tick_params(axis='x', rotation=90)
|
81 |
+
st.pyplot(fig)
|
82 |
+
|
83 |
+
st.write("") # Add a blank line
|
84 |
+
|
85 |
+
st.write("**Explanation**:")
|
86 |
+
markdown_text = """
|
87 |
+
* From the top 10 transaction by job we can see `Color Technologist` have the most fraud with above 20 transaction meanwhile `Film/Video editor` are the most non fraud with 2.000 transaction
|
88 |
+
* From the top 10 transaction by state we can see `NY` have the most fraud with above 80 transaction meanwhile `TX`are the most non fraud with 20.000 transaction
|
89 |
+
* From the top 10 transaction by city we can see `Camden` have the most fraud above 20 transaction meanwhile `Birmingham` are the most non fraud with almost 1.200 transaction
|
90 |
+
* From the top 10 transaction by merchant we can see `Healthcore LLC.` have most fraud with 10 transaction meanwhile `Killback LLC.` are the most non fraud with almost 1.000 transaction
|
91 |
+
"""
|
92 |
+
st.markdown(markdown_text)
|
93 |
+
|
94 |
+
st.markdown("<h2><b>Total Number and Amount for Fraud and Non Fraud Transaction</b></h2>", unsafe_allow_html=True)
|
95 |
+
|
96 |
+
def annotate_bar(ax, custom_y_func, font_size=14):
|
97 |
+
for p in ax.patches:
|
98 |
+
value = str(round(p.get_height(), 1))
|
99 |
+
x = (p.get_x() + p.get_width() / 2) * 0.99
|
100 |
+
y = ((p.get_y() + p.get_height() / 2) * 0.99)
|
101 |
+
y = custom_y_func(y)
|
102 |
+
ax.annotate(value, (x, y), color="black", size=font_size, ha='center', va='center')
|
103 |
+
|
104 |
+
# Fraud and Not Fraud Transactions
|
105 |
+
st.header("Fraud and Not Fraud Transactions Count")
|
106 |
+
data_fraud_count = data['is_fraud'].apply(lambda x: "Fraud" if x == 1 else 'Not Fraud').value_counts().reset_index()
|
107 |
+
fig, ax = plt.subplots(figsize=(15, 5))
|
108 |
+
sns.barplot(data=data_fraud_count, x='is_fraud', y='count', color='#c6def8', ax=ax)
|
109 |
+
annotate_bar(ax, lambda y: 15000 if y < 10000 else y, font_size=14)
|
110 |
+
ax.set_title("Total number of transaction for fraud and not fraud transaction", fontsize=12, fontweight='bold')
|
111 |
+
ax.set_ylabel("Transaction count")
|
112 |
+
ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
|
113 |
+
st.pyplot(fig)
|
114 |
+
|
115 |
+
# Fraud and Not Fraud Amount
|
116 |
+
st.header("Fraud and Not Fraud Transactions Amount")
|
117 |
+
data_fraud_amount = data.groupby('is_fraud')['amt'].sum().reset_index()
|
118 |
+
fig, ax = plt.subplots(figsize=(15, 5))
|
119 |
+
sns.barplot(data=data_fraud_amount, x='is_fraud', y='amt', color='#c6def8', ax=ax)
|
120 |
+
annotate_bar(ax, lambda y: 1900000 if y < 1200000 else y, font_size=12)
|
121 |
+
ax.set_title("Total transaction amount for fraud and not fraud transaction", fontsize=12, fontweight='bold')
|
122 |
+
ax.set_ylabel("Transaction amount")
|
123 |
+
ax.set_xticklabels(['Not Fraud', 'Fraud'], rotation=0)
|
124 |
+
st.pyplot(fig)
|
125 |
+
|
126 |
+
st.write("**Explanation**:")
|
127 |
+
markdown_text = """
|
128 |
+
Based on visualisation above:
|
129 |
+
* There is 276743 total number of transaction `not fraud` and 1117 `fraud` transaction
|
130 |
+
* There is 18745296.5 total transaction amount of `not fraud` and 1117 `fraud` transaction
|
131 |
+
"""
|
132 |
+
st.markdown(markdown_text)
|
133 |
+
|
134 |
+
# Calculate age
|
135 |
+
data['dob'] = pd.to_datetime(data['dob'])
|
136 |
+
data['age'] = (2020 - data['dob'].dt.year)
|
137 |
+
|
138 |
+
def apply_age_group(age):
|
139 |
+
if age <= 18:
|
140 |
+
return 'Teenager'
|
141 |
+
elif age <= 25:
|
142 |
+
return "Young Adult"
|
143 |
+
elif age <= 64:
|
144 |
+
return "Adult"
|
145 |
+
else:
|
146 |
+
return "Elder"
|
147 |
+
|
148 |
+
data['age_group'] = data['age'].apply(apply_age_group)
|
149 |
+
|
150 |
+
# Overview of dataset by month, gender, and category
|
151 |
+
st.header("Overview of dataset by Age, gender, and category")
|
152 |
+
columns = ['gender', 'category', 'age', 'age_group']
|
153 |
+
columns_name = ['gender', 'category', 'age', 'age group']
|
154 |
+
name = ['Not Fraud', 'Fraud']
|
155 |
+
|
156 |
+
for col in columns:
|
157 |
+
st.subheader("Distribution of transaction by " + columns_name[columns.index(col)])
|
158 |
+
fig, ax = plt.subplots(1, 2, figsize=(15, 5)) # Create a subplot with 2 columns
|
159 |
+
for i in range(0, 2):
|
160 |
+
data_1 = data[data['is_fraud'] == i]
|
161 |
+
if col == 'gender':
|
162 |
+
ax[i].pie(data_1[col].value_counts(), labels=['Female', 'Male'], autopct='%1.1f%%')
|
163 |
+
elif col == 'age_group':
|
164 |
+
ax[i].pie(data_1[col].value_counts(), labels=data_1[col].value_counts().index, autopct='%1.1f%%')
|
165 |
+
elif col == 'category':
|
166 |
+
sns.countplot(data=data_1, y=col, order=data_1[col].value_counts().index, ax=ax[i])
|
167 |
+
else:
|
168 |
+
sns.histplot(data=data_1, x=col, ax=ax[i])
|
169 |
+
ax[i].set_title(name[i])
|
170 |
+
ax[i].set_xlabel(columns_name[columns.index(col)])
|
171 |
+
if col == 'category':
|
172 |
+
ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation=90)
|
173 |
+
st.pyplot(fig)
|
174 |
+
|
175 |
+
st.write("**Explanation**:")
|
176 |
+
markdown_text = """
|
177 |
+
Based on visualisation above we can see:
|
178 |
+
- There is 54,8% transaction of `female` and 45,2% transaction of `male` in `not fraud` and `fraud`
|
179 |
+
- Most distribution of `not fraud` transaction by category is from `gas_transport` meanwhile in fraud is from shopping_net
|
180 |
+
- In distribution transaction by age mostly between 30-40 in `fraud` and between 45-50 for `not fraud`
|
181 |
+
- By age group mostly `not fraud` transaction is from Adult with 73,9% and `fraud` also from Adult with 74,6%
|
182 |
+
"""
|
183 |
+
st.markdown(markdown_text)
|
fraud_test.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86a070405882d0414853dc0d2879451ded709d7327f6630ed4b39b5167ca815a
|
3 |
+
size 143639688
|
prediction.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Library Random Data
|
7 |
+
from scipy.stats import randint
|
8 |
+
|
9 |
+
from datetime import datetime, timedelta
|
10 |
+
from sklearn.utils import shuffle
|
11 |
+
|
12 |
+
def model_page():
|
13 |
+
st.title("Model Prediction of Credit Card Fault")
|
14 |
+
st.write("The model predicts whether the customer's transaction is fraud or not")
|
15 |
+
st.sidebar.header('User Input Features')
|
16 |
+
|
17 |
+
input_data = user_input()
|
18 |
+
|
19 |
+
st.subheader('User Input')
|
20 |
+
st.write(input_data)
|
21 |
+
|
22 |
+
# Load the model using a context manager to ensure the file is closed
|
23 |
+
with open("XGB_best_model.pkl", "rb") as f:
|
24 |
+
load_model = pickle.load(f)
|
25 |
+
|
26 |
+
prediction = load_model.predict(input_data)
|
27 |
+
|
28 |
+
if prediction == 1:
|
29 |
+
prediction = 'The Transaction is Fraud'
|
30 |
+
else:
|
31 |
+
prediction = 'The Transaction is Legit'
|
32 |
+
|
33 |
+
st.write('Based on user input, the model predicted: ')
|
34 |
+
st.write(prediction)
|
35 |
+
|
36 |
+
def user_input(num_rows=1):
|
37 |
+
data = generate_data(num_rows)
|
38 |
+
return data
|
39 |
+
|
40 |
+
def generate_data(num_rows=555719):
|
41 |
+
trans_date_trans_time = st.sidebar.date_input("Transaction Date", value=datetime.now(), min_value=datetime.now() - timedelta(days=365), max_value=datetime.now())
|
42 |
+
trans_date_trans_time = [trans_date_trans_time for _ in range(num_rows)]
|
43 |
+
|
44 |
+
cc_num = st.sidebar.number_input("Credit Card Number", value=500000, min_value=100000, max_value=999999)
|
45 |
+
cc_num = [cc_num for _ in range(num_rows)]
|
46 |
+
|
47 |
+
merchant = st.sidebar.selectbox("Merchant", ['Merchant1', 'Merchant2', 'Merchant3'])
|
48 |
+
merchant = [merchant for _ in range(num_rows)]
|
49 |
+
|
50 |
+
category = st.sidebar.selectbox("Category", ['Personal', 'Childcare', 'Food', 'Transportation'])
|
51 |
+
category = [category for _ in range(num_rows)]
|
52 |
+
|
53 |
+
amt = st.sidebar.number_input("Amount", value=500, min_value=0, max_value=1000)
|
54 |
+
amt = [amt for _ in range(num_rows)]
|
55 |
+
|
56 |
+
first = st.sidebar.text_input("First Name")
|
57 |
+
first = [first for _ in range(num_rows)]
|
58 |
+
|
59 |
+
last = st.sidebar.text_input("Last Name")
|
60 |
+
last = [last for _ in range(num_rows)]
|
61 |
+
|
62 |
+
gender = st.sidebar.selectbox("Gender", ['Male', 'Female'])
|
63 |
+
gender = [gender for _ in range(num_rows)]
|
64 |
+
|
65 |
+
street = st.sidebar.text_input("Street")
|
66 |
+
street = [street for _ in range(num_rows)]
|
67 |
+
|
68 |
+
city = st.sidebar.text_input("City")
|
69 |
+
city = [city for _ in range(num_rows)]
|
70 |
+
|
71 |
+
state = st.sidebar.selectbox("State", ['NY', 'CA', 'IL', 'TX'])
|
72 |
+
state = [state for _ in range(num_rows)]
|
73 |
+
|
74 |
+
zip_code = st.sidebar.text_input("Zip Code")
|
75 |
+
zip_code = [zip_code for _ in range(num_rows)]
|
76 |
+
|
77 |
+
lat = st.sidebar.number_input("Latitude", value=40.7128, min_value=-90., max_value=90.)
|
78 |
+
lat = [lat for _ in range(num_rows)]
|
79 |
+
|
80 |
+
long_ = st.sidebar.number_input("Longitude", value=-74.0060, min_value=-180., max_value=180.)
|
81 |
+
long_ = [long_ for _ in range(num_rows)]
|
82 |
+
|
83 |
+
city_pop = st.sidebar.number_input("City Population", value=10000, min_value=10000, max_value=1000000)
|
84 |
+
city_pop = [city_pop for _ in range(num_rows)]
|
85 |
+
|
86 |
+
job = st.sidebar.selectbox("Job", ['Software Engineer', 'Doctor', 'Lawyer', 'Teacher'])
|
87 |
+
job = [job for _ in range(num_rows)]
|
88 |
+
|
89 |
+
dob = st.sidebar.date_input("Date of Birth", value=datetime.now() - timedelta(days=365*70), min_value=datetime.now() - timedelta(days=365*100), max_value=datetime.now())
|
90 |
+
dob = [dob for _ in range(num_rows)]
|
91 |
+
|
92 |
+
trans_num = np.arange(1, num_rows + 1)
|
93 |
+
|
94 |
+
unix_time = st.sidebar.number_input("Unix Time", value=int(datetime.now().timestamp()), min_value=0, max_value=int(datetime.now().timestamp()))
|
95 |
+
unix_time = [unix_time for _ in range(num_rows)]
|
96 |
+
|
97 |
+
merch_lat = st.sidebar.number_input("Merchant Latitude", value=40.7128, min_value=-90., max_value=90.)
|
98 |
+
merch_lat = [merch_lat for _ in range(num_rows)]
|
99 |
+
|
100 |
+
merch_long = st.sidebar.number_input("Merchant Longitude", value=-74.0060, min_value=-180., max_value=180.)
|
101 |
+
merch_long = [merch_long for _ in range(num_rows)]
|
102 |
+
|
103 |
+
age = st.sidebar.number_input("Age", value=30, min_value=18, max_value=80)
|
104 |
+
age = [age for _ in range(num_rows)]
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
data = {
|
109 |
+
'Trans_date_trans_time': trans_date_trans_time,
|
110 |
+
'Cc_num': cc_num,
|
111 |
+
'Merchant': merchant,
|
112 |
+
'Category': category,
|
113 |
+
'Amt': amt,
|
114 |
+
'First': first,
|
115 |
+
'Last': last,
|
116 |
+
'Gender': gender,
|
117 |
+
'Street': street,
|
118 |
+
'City': city,
|
119 |
+
'State': state,
|
120 |
+
'Zip': zip_code,
|
121 |
+
'Lat': lat,
|
122 |
+
'Long': long_,
|
123 |
+
'City_pop': city_pop,
|
124 |
+
'Job': job,
|
125 |
+
'Dob': dob,
|
126 |
+
'Trans_num': trans_num,
|
127 |
+
'Unix_time': unix_time,
|
128 |
+
'Merch_lat': merch_lat,
|
129 |
+
'Merch_long': merch_long,
|
130 |
+
'age': age,
|
131 |
+
'category': category,
|
132 |
+
'amt': amt,
|
133 |
+
'state': state,
|
134 |
+
'job': job
|
135 |
+
}
|
136 |
+
|
137 |
+
# Create a Pandas DataFrame
|
138 |
+
df = pd.DataFrame(data)
|
139 |
+
|
140 |
+
return df
|
141 |
+
|
142 |
+
# def main():
|
143 |
+
# st.title("Credit Card Transaction Data")
|
144 |
+
# st.write("This app generates random credit card transaction data.")
|
145 |
+
|
146 |
+
# num_rows = st.slider("Number of rows", 100, 100000, 555719)
|
147 |
+
|
148 |
+
# df = generate_data(num_rows)
|
149 |
+
|
150 |
+
# st.write(df)
|
151 |
+
|
152 |
+
# if __name__ == "__main__":
|
153 |
+
# main()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
scikit-learn
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
pickle
|
5 |
+
transformers
|
6 |
+
seaborn
|