sohail-shaikh-s07 commited on
Commit
84337b0
·
verified ·
1 Parent(s): 32cd5bb

Upload 3 files

Browse files
Files changed (3) hide show
  1. pages/fraud.py +333 -0
  2. pages/home.py +71 -0
  3. pages/project_details.py +91 -0
pages/fraud.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/fraud.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import joblib
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ import base64
8
+
9
+ # Page title
10
+ st.title("Fraud Detection")
11
+
12
+ # Header with an image
13
+ st.image(
14
+ "https://images.unsplash.com/photo-1611974789855-9c2a0a7236a3?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80",
15
+ use_column_width=True,
16
+ )
17
+
18
+
19
+ # Load pre-trained model
20
+ @st.cache_resource
21
+ def load_model():
22
+ with open("model.pkl", "rb") as file:
23
+ model = joblib.load(file)
24
+ return model
25
+
26
+
27
+ model = load_model()
28
+
29
+
30
+ # Function to visualize predictions
31
+ def visualize_predictions(data):
32
+ # Create a tab layout for different visualizations
33
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(
34
+ [
35
+ "Fraud Distribution",
36
+ "Transaction Types",
37
+ "Amount Analysis",
38
+ "Balance Impact",
39
+ "Time Patterns",
40
+ ]
41
+ )
42
+
43
+ with tab1:
44
+ st.subheader("Fraud vs. Non-Fraud Distribution")
45
+
46
+ # Pie chart of fraud vs non-fraud
47
+ fraud_counts = data["prediction_label"].value_counts().reset_index()
48
+ fraud_counts.columns = ["Category", "Count"]
49
+
50
+ fig_pie = px.pie(
51
+ fraud_counts,
52
+ values="Count",
53
+ names="Category",
54
+ title="Distribution of Fraud vs Non-Fraud Transactions",
55
+ color_discrete_sequence=px.colors.sequential.RdBu,
56
+ hole=0.3,
57
+ )
58
+ st.plotly_chart(fig_pie, use_container_width=True)
59
+
60
+ # Add percentage information
61
+ total = fraud_counts["Count"].sum()
62
+ fraud_percent = round(
63
+ (
64
+ fraud_counts[fraud_counts["Category"] == "Fraud Transactions"][
65
+ "Count"
66
+ ].sum()
67
+ / total
68
+ )
69
+ * 100,
70
+ 2,
71
+ )
72
+ st.info(f"Percentage of fraudulent transactions: {fraud_percent}%")
73
+
74
+ with tab2:
75
+ st.subheader("Transaction Types Analysis")
76
+
77
+ # Bar chart of transaction types with fraud distribution
78
+ type_fraud = pd.crosstab(data["type"], data["prediction_label"])
79
+
80
+ fig_bar = go.Figure()
81
+ for col in type_fraud.columns:
82
+ fig_bar.add_trace(
83
+ go.Bar(
84
+ x=type_fraud.index,
85
+ y=type_fraud[col],
86
+ name=col,
87
+ marker_color="red" if col == "Fraud Transactions" else "blue",
88
+ )
89
+ )
90
+
91
+ fig_bar.update_layout(
92
+ title="Fraud Distribution by Transaction Type",
93
+ xaxis_title="Transaction Type",
94
+ yaxis_title="Count",
95
+ barmode="group",
96
+ )
97
+ st.plotly_chart(fig_bar, use_container_width=True)
98
+
99
+ # Calculate fraud percentage by transaction type
100
+ type_fraud_pct = pd.DataFrame()
101
+ for col in type_fraud.columns:
102
+ type_fraud_pct[col + " %"] = round(
103
+ type_fraud[col] / type_fraud.sum(axis=1) * 100, 2
104
+ )
105
+
106
+ st.dataframe(
107
+ type_fraud_pct.reset_index().rename(columns={"index": "Transaction Type"})
108
+ )
109
+
110
+ with tab3:
111
+ st.subheader("Transaction Amount Analysis")
112
+
113
+ # Histogram of transaction amounts by fraud status
114
+ fig_hist = px.histogram(
115
+ data,
116
+ x="amount",
117
+ color="prediction_label",
118
+ marginal="box",
119
+ nbins=50,
120
+ opacity=0.7,
121
+ title="Distribution of Transaction Amounts",
122
+ color_discrete_map={
123
+ "Fraud Transactions": "red",
124
+ "Not Fraud Transactions": "blue",
125
+ },
126
+ )
127
+ fig_hist.update_layout(xaxis_title="Amount", yaxis_title="Count")
128
+ st.plotly_chart(fig_hist, use_container_width=True)
129
+
130
+ # Summary statistics for amounts
131
+ st.subheader("Amount Statistics by Fraud Status")
132
+ amount_stats = data.groupby("prediction_label")["amount"].describe()
133
+ st.dataframe(amount_stats)
134
+
135
+ with tab4:
136
+ st.subheader("Balance Impact Analysis")
137
+
138
+ # Calculate balance change
139
+ data["orig_balance_change"] = data["newbalanceOrig"] - data["oldbalanceOrg"]
140
+ data["dest_balance_change"] = data["newbalanceDest"] - data["oldbalanceDest"]
141
+
142
+ # Create a figure for balance changes
143
+ balance_df = pd.melt(
144
+ data[["prediction_label", "orig_balance_change", "dest_balance_change"]],
145
+ id_vars=["prediction_label"],
146
+ value_vars=["orig_balance_change", "dest_balance_change"],
147
+ var_name="Account",
148
+ value_name="Balance Change",
149
+ )
150
+
151
+ balance_df["Account"] = balance_df["Account"].map(
152
+ {
153
+ "orig_balance_change": "Origin Account",
154
+ "dest_balance_change": "Destination Account",
155
+ }
156
+ )
157
+
158
+ fig_box = px.box(
159
+ balance_df,
160
+ x="Account",
161
+ y="Balance Change",
162
+ color="prediction_label",
163
+ title="Balance Changes in Origin vs Destination Accounts",
164
+ color_discrete_map={
165
+ "Fraud Transactions": "red",
166
+ "Not Fraud Transactions": "blue",
167
+ },
168
+ )
169
+ st.plotly_chart(fig_box, use_container_width=True)
170
+
171
+ with tab5:
172
+ st.subheader("Time Patterns")
173
+
174
+ # Time series of transactions by step (time)
175
+ if "step" in data.columns:
176
+ step_counts = (
177
+ data.groupby(["step", "prediction_label"])
178
+ .size()
179
+ .reset_index(name="count")
180
+ )
181
+
182
+ fig_line = px.line(
183
+ step_counts,
184
+ x="step",
185
+ y="count",
186
+ color="prediction_label",
187
+ title="Transaction Frequency Over Time",
188
+ color_discrete_map={
189
+ "Fraud Transactions": "red",
190
+ "Not Fraud Transactions": "blue",
191
+ },
192
+ )
193
+ fig_line.update_layout(
194
+ xaxis_title="Time Step", yaxis_title="Number of Transactions"
195
+ )
196
+ st.plotly_chart(fig_line, use_container_width=True)
197
+
198
+ # Heatmap of fraud probability by time
199
+ if len(data["step"].unique()) > 1:
200
+ pivot_data = pd.pivot_table(
201
+ data,
202
+ values="prediction",
203
+ index="step",
204
+ columns="type",
205
+ aggfunc="mean",
206
+ ).fillna(0)
207
+
208
+ fig_heatmap = px.imshow(
209
+ pivot_data,
210
+ title="Fraud Probability Heatmap by Transaction Type and Time",
211
+ color_continuous_scale="Reds",
212
+ labels=dict(
213
+ x="Transaction Type", y="Time Step", color="Fraud Probability"
214
+ ),
215
+ )
216
+ st.plotly_chart(fig_heatmap, use_container_width=True)
217
+ else:
218
+ st.write("Time step data is not available for time pattern analysis.")
219
+
220
+
221
+ # Function to add color formatting to the DataFrame
222
+ def color_fraud(val):
223
+ color = "red" if val == "Fraud Transactions" else "green"
224
+ return f"background-color: {color}"
225
+
226
+
227
+ # Function to create a download link for the CSV file
228
+ def get_csv_download_link(df):
229
+ csv = df.to_csv(index=False)
230
+ b64 = base64.b64encode(csv.encode()).decode() # Convert to base64
231
+ href = f'<a href="data:file/csv;base64,{b64}" download="fraud_predictions.csv">Download CSV File</a>'
232
+ return href
233
+
234
+
235
+ # Transaction Data Input section
236
+ st.header("Transaction Data Input")
237
+ st.write("Choose to upload a CSV file or manually input transaction data.")
238
+
239
+ # Option to choose upload or manual input
240
+ option = st.radio("Select input method:", ("Upload CSV", "Manual Input"))
241
+
242
+ if option == "Upload CSV":
243
+ # Option to upload a CSV file
244
+ file_upload = st.file_uploader("Upload CSV", type=["csv"])
245
+ if file_upload is not None:
246
+ data = pd.read_csv(file_upload)
247
+ st.write("Uploaded Data Preview:")
248
+ st.write(data.head())
249
+
250
+ if st.button("Submit CSV"):
251
+ # Predict using the uploaded CSV data
252
+ predictions = model.predict(data)
253
+ data["prediction"] = predictions
254
+ data["prediction_label"] = data["prediction"].map(
255
+ {1: "Fraud Transactions", 0: "Not Fraud Transactions"}
256
+ )
257
+ st.write("Predictions:")
258
+
259
+ # Apply color formatting to the DataFrame
260
+ styled_data = data[
261
+ ["type", "nameOrig", "nameDest", "prediction_label"]
262
+ ].style.applymap(color_fraud, subset=["prediction_label"])
263
+ st.dataframe(styled_data)
264
+
265
+ # Add a download button for the predicted CSV
266
+ st.markdown(get_csv_download_link(data), unsafe_allow_html=True)
267
+
268
+ # Visualizations for CSV data
269
+ st.header("Visualization of Prediction Results")
270
+ visualize_predictions(data)
271
+
272
+ elif option == "Manual Input":
273
+ st.write("Manually input data:")
274
+ # Manual input of data
275
+ step = st.number_input("Step", min_value=0)
276
+ type = st.selectbox("Type", ["TRANSFER", "PAYMENT", "DEBIT", "CASH_OUT", "CASH_IN"])
277
+ amount = st.number_input("Amount", min_value=0.0)
278
+ nameOrig = st.text_input("Origin Account Name")
279
+ oldbalanceOrg = st.number_input("Old Balance (Origin)", min_value=0.0)
280
+ newbalanceOrig = st.number_input("New Balance (Origin)", min_value=0.0)
281
+ nameDest = st.text_input("Destination Account Name")
282
+ oldbalanceDest = st.number_input("Old Balance (Destination)", min_value=0.0)
283
+ newbalanceDest = st.number_input("New Balance (Destination)", min_value=0.0)
284
+ isFlaggedFraud = st.selectbox("Is Flagged Fraud?", [0, 1])
285
+
286
+ if st.button("Submit"):
287
+ # Create a DataFrame from manual input
288
+ manual_data = pd.DataFrame(
289
+ {
290
+ "step": [step],
291
+ "type": [type],
292
+ "amount": [amount],
293
+ "nameOrig": [nameOrig],
294
+ "oldbalanceOrg": [oldbalanceOrg],
295
+ "newbalanceOrig": [newbalanceOrig],
296
+ "nameDest": [nameDest],
297
+ "oldbalanceDest": [oldbalanceDest],
298
+ "newbalanceDest": [newbalanceDest],
299
+ "isFlaggedFraud": [isFlaggedFraud],
300
+ }
301
+ )
302
+ st.write("Manual Input Data:")
303
+ st.write(manual_data)
304
+
305
+ # Predict using the manually input data
306
+ manual_predictions = model.predict(manual_data)
307
+ manual_data["prediction"] = manual_predictions
308
+ manual_data["prediction_label"] = manual_data["prediction"].map(
309
+ {1: "Fraud Transactions", 0: "Not Fraud Transactions"}
310
+ )
311
+ st.write("Predictions:")
312
+
313
+ # Apply color formatting to the DataFrame
314
+ styled_manual_data = manual_data[
315
+ ["type", "nameOrig", "nameDest", "prediction_label"]
316
+ ].style.applymap(color_fraud, subset=["prediction_label"])
317
+ st.dataframe(styled_manual_data)
318
+
319
+ # For manual input, we'll just show the prediction result
320
+ st.header("Prediction Result")
321
+ result = manual_data["prediction_label"].iloc[0]
322
+ st.markdown(
323
+ f"<h2 style='text-align: center; color: {'red' if result == 'Fraud Transactions' else 'green'};'>{result}</h2>",
324
+ unsafe_allow_html=True,
325
+ )
326
+
327
+ # Footer
328
+ st.markdown("---")
329
+ st.write(
330
+ """
331
+ © 2024 Financial Fraud Detection System. All rights reserved.
332
+ """
333
+ )
pages/home.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/home.py
2
+ import streamlit as st
3
+
4
+ # Page title
5
+ st.title("Welcome to the Financial Fraud Detection System")
6
+
7
+ # Header with an image
8
+ st.image("https://images.unsplash.com/photo-1611974789855-9c2a0a7236a3?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80", use_column_width=True)
9
+
10
+ # Introduction section
11
+ st.header("Introduction")
12
+ st.write("""
13
+ In the digital age, financial fraud has become a significant concern for individuals, businesses, and financial institutions.
14
+ With the increasing volume of online transactions, the need for robust fraud detection systems has never been more critical.
15
+ Our **Financial Fraud Detection System** leverages advanced machine learning techniques to identify and prevent fraudulent activities in real-time.
16
+ """)
17
+
18
+ # Key features section
19
+ st.header("Key Features")
20
+ col1, col2, col3 = st.columns(3)
21
+
22
+ with col1:
23
+ st.subheader("Real-Time Detection")
24
+ st.write("""
25
+ Our system processes transactions in real-time, providing instant fraud detection and alerting.
26
+ This ensures that fraudulent activities are identified and mitigated as soon as they occur.
27
+ """)
28
+
29
+ with col2:
30
+ st.subheader("High Accuracy")
31
+ st.write("""
32
+ Utilizing state-of-the-art machine learning algorithms, our system achieves an accuracy rate of over 95%,
33
+ minimizing false positives and ensuring reliable fraud detection.
34
+ """)
35
+
36
+ with col3:
37
+ st.subheader("User-Friendly Interface")
38
+ st.write("""
39
+ The system features an intuitive web interface built with Streamlit, allowing users to easily upload transaction data,
40
+ view fraud predictions, and analyze results with detailed visualizations.
41
+ """)
42
+
43
+ # How it works section
44
+ st.header("How It Works")
45
+ st.write("""
46
+ Our Financial Fraud Detection System is built on the **XGBoost** algorithm, a powerful machine learning model known for its efficiency and accuracy in handling tabular data.
47
+ The system processes both historical and real-time transaction data, identifying patterns and anomalies that indicate fraudulent behavior.
48
+ """)
49
+
50
+ # Steps in the process
51
+ st.subheader("Process Overview")
52
+ st.write("""
53
+ 1. **Data Collection**: Transaction data is collected from various sources, including banks, e-commerce platforms, and payment gateways.
54
+ 2. **Data Preprocessing**: The data is cleaned, normalized, and transformed to ensure it is suitable for analysis.
55
+ 3. **Model Training**: The XGBoost model is trained on a large dataset of labeled transactions, learning to distinguish between legitimate and fraudulent activities.
56
+ 4. **Real-Time Detection**: The trained model is deployed to analyze incoming transactions in real-time, flagging potential fraud for further investigation.
57
+ 5. **Visualization & Reporting**: Users can view detailed reports and visualizations of fraud predictions, enabling informed decision-making.
58
+ """)
59
+
60
+ # Call to action
61
+ st.header("Get Started")
62
+ st.write("""
63
+ Ready to experience the power of our Financial Fraud Detection System?
64
+ Navigate to the **Fraud Detection** page to upload your transaction data and start detecting fraud today!
65
+ """)
66
+
67
+ # Footer
68
+ st.markdown("---")
69
+ st.write("""
70
+ © 2024 Financial Fraud Detection System. All rights reserved.
71
+ """)
pages/project_details.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/project_details.py
2
+ import streamlit as st
3
+
4
+ # Page title
5
+ st.title("Project Details")
6
+
7
+ # Header with an image
8
+ st.image("https://images.unsplash.com/photo-1454165804606-c3d57bc86b40?ixlib=rb-1.2.1&auto=format&fit=crop&w=1950&q=80", use_column_width=True)
9
+
10
+ # Introduction section
11
+ st.header("Introduction")
12
+ st.write("""
13
+ The **Financial Fraud Detection System** is an advanced solution designed to identify and prevent fraudulent transactions in real-time.
14
+ With the increasing volume of online transactions, the need for a robust and scalable fraud detection system has become critical.
15
+ Our project leverages state-of-the-art machine learning techniques to provide accurate and efficient fraud detection, helping financial institutions and businesses minimize losses and enhance security.
16
+ """)
17
+
18
+ # Objectives section
19
+ st.header("Project Objectives")
20
+ st.write("""
21
+ The primary objectives of the Financial Fraud Detection System are:
22
+ """)
23
+ st.markdown("""
24
+ - **Real-Time Fraud Detection**: Detect fraudulent transactions as they occur, enabling immediate intervention.
25
+ - **High Accuracy**: Achieve a fraud detection accuracy rate of over 95% to minimize false positives and false negatives.
26
+ - **Scalability**: Handle large volumes of transactions efficiently, ensuring the system can scale with growing demand.
27
+ - **User-Friendly Interface**: Provide an intuitive and easy-to-use interface for financial analysts and decision-makers.
28
+ - **Continuous Learning**: Enable the system to adapt to new fraud patterns by continuously retraining the model with new data.
29
+ """)
30
+
31
+ # Methodology section
32
+ st.header("Methodology")
33
+ st.write("""
34
+ Our methodology for developing the Financial Fraud Detection System involves the following steps:
35
+ """)
36
+ st.markdown("""
37
+ 1. **Data Collection**: Gather transaction data from various sources, including banks, e-commerce platforms, and payment gateways.
38
+ 2. **Data Preprocessing**: Clean, normalize, and transform the data to ensure it is suitable for analysis.
39
+ 3. **Feature Engineering**: Extract relevant features from the transaction data, such as transaction amount, frequency, and user behavior.
40
+ 4. **Model Training**: Train the XGBoost machine learning model on a labeled dataset of transactions to distinguish between legitimate and fraudulent activities.
41
+ 5. **Model Evaluation**: Evaluate the model's performance using metrics such as accuracy, precision, recall, and F1-score.
42
+ 6. **Deployment**: Deploy the trained model in a production environment, enabling real-time fraud detection.
43
+ 7. **Monitoring & Retraining**: Continuously monitor the system's performance and retrain the model with new data to adapt to evolving fraud patterns.
44
+ """)
45
+
46
+ # Technology Stack section
47
+ st.header("Technology Stack")
48
+ st.write("""
49
+ The Financial Fraud Detection System is built using the following technologies:
50
+ """)
51
+ st.markdown("""
52
+ - **Programming Language**: Python
53
+ - **Machine Learning Framework**: Scikit-learn, XGBoost
54
+ - **Data Processing**: Pandas, NumPy
55
+ - **Visualization**: Matplotlib, Seaborn, Plotly
56
+ - **Web Interface**: Streamlit
57
+ - **Model Serialization**: Joblib
58
+ - **Version Control**: Git
59
+ """)
60
+
61
+ # Key Features section
62
+ st.header("Key Features")
63
+ st.write("""
64
+ The Financial Fraud Detection System offers the following key features:
65
+ """)
66
+ st.markdown("""
67
+ - **Real-Time Processing**: Analyze transactions in real-time to detect fraud as it happens.
68
+ - **Batch Processing**: Upload and analyze bulk transaction data in CSV format.
69
+ - **Interactive Dashboard**: Visualize fraud detection results with interactive charts and graphs.
70
+ - **Fraud Probability Scores**: Provide a fraud risk score for each transaction, helping analysts prioritize investigations.
71
+ - **Decision Explainability**: Offer insights into why a transaction was flagged as fraudulent, enhancing transparency.
72
+ - **Scalable Architecture**: Designed to handle high volumes of transactions without performance degradation.
73
+ """)
74
+
75
+ # Future Enhancements section
76
+ st.header("Future Enhancements")
77
+ st.write("""
78
+ We are continuously working to improve the Financial Fraud Detection System. Some of the planned enhancements include:
79
+ """)
80
+ st.markdown("""
81
+ - **Integration with Banking Systems**: Enable seamless integration with existing banking and payment systems for live fraud detection.
82
+ - **Advanced Feature Engineering**: Incorporate additional features such as behavioral analytics and device tracking to improve detection accuracy.
83
+ - **Automated Model Retraining**: Implement an automated pipeline for retraining the model with new data to adapt to evolving fraud patterns.
84
+ - **Mobile-Friendly Interface**: Develop a mobile-friendly version of the web interface for on-the-go fraud detection monitoring.
85
+ """)
86
+
87
+ # Footer
88
+ st.markdown("---")
89
+ st.write("""
90
+ © 2024 Financial Fraud Detection System. All rights reserved.
91
+ """)