AfshinMA commited on
Commit
a0a9b73
·
verified ·
1 Parent(s): 0aa8399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -153
app.py CHANGED
@@ -1,154 +1,149 @@
1
- import os
2
- import joblib
3
- import pandas as pd
4
- import streamlit as st
5
- from typing import Any, Dict, List
6
- from imblearn.over_sampling import SMOTE
7
- from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
8
- from sklearn.model_selection import train_test_split
9
- from sklearn.preprocessing import StandardScaler
10
-
11
- # Constants for directories and file names
12
- DIR = 'C:\\Users\\Afshin\\Desktop\\10_Projects\\Project_4_Fraud_Detection_Etherium\\'
13
- MODEL_DIR = os.path.join(DIR, 'models')
14
- DATA_DIR = os.path.join(DIR, 'datasets')
15
- DATA_FILE = 'cleaned_transaction_dataset.csv'
16
- MODEL_NAMES = [
17
- 'Ada Boost Classifier',
18
- 'Extra Trees Classifier',
19
- 'Gradient Boosting Classifier',
20
- 'LGBM Classifier',
21
- 'Random Forest Classifier',
22
- 'XGBoost Classifier',
23
- ]
24
-
25
- # Load dataset
26
- data_path = os.path.join(DATA_DIR, DATA_FILE)
27
- df = pd.read_csv(data_path)
28
-
29
- # Load models
30
- def load_models(model_names: List[str]) -> Dict[str, Any]:
31
- """Load machine learning models from disk."""
32
- models = {}
33
- for name in model_names:
34
- path = os.path.join(MODEL_DIR, f"{name.replace(' ', '')}.joblib")
35
- try:
36
- models[name] = joblib.load(path)
37
- except Exception as e:
38
- st.error(f"Error loading model {name}: {str(e)}")
39
- return models
40
-
41
- models = load_models(MODEL_NAMES)
42
-
43
- # Prepare features and target
44
- X = df.drop(columns=['FLAG'])
45
- y = df['FLAG']
46
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
47
-
48
- # Prediction and metrics evaluation function
49
- def calculate_metrics(y_true, y_pred, average_type='binary'):
50
- """Calculate and return accuracy, recall, F1, and precision scores."""
51
- acc = accuracy_score(y_true, y_pred)
52
- rec = recall_score(y_true, y_pred, average=average_type)
53
- f1 = f1_score(y_true, y_pred, average=average_type)
54
- prec = precision_score(y_true, y_pred, average=average_type)
55
- return acc, rec, f1, prec
56
-
57
- def load_and_predict(sample):
58
- try:
59
- # Using StandardScaler to scale numric features
60
- scaler = StandardScaler()
61
- X_train_scaled = scaler.fit_transform(X_train)
62
- sample_trans = scaler.fit_transform(sample)
63
-
64
- # Using SMOTE to handle class imbalance
65
- X_resampled, y_resampled = SMOTE(random_state=123).fit_resample(X_train_scaled, y_train)
66
-
67
- results = []
68
- for name, model in models.items():
69
- y_resampled_pred = model.predict(X_resampled)
70
- flag_pred = model.predict(sample_trans)
71
- acc, rec, f1, prec = calculate_metrics(y_resampled, y_resampled_pred)
72
-
73
- results.append({
74
- 'Model': name,
75
- 'Predicted Fraud': 'Yes' if flag_pred[0] == 1 else 'No',
76
- 'Accuracy %': acc * 100,
77
- 'Recall %': rec * 100,
78
- 'F1 %': f1 * 100,
79
- 'Precision %': prec * 100
80
- })
81
-
82
- return pd.DataFrame(results).sort_values(by='Accuracy %', ascending=False)
83
-
84
- except Exception as e:
85
- st.error(f"An error occurred during model loading or prediction: {str(e)}")
86
- return pd.DataFrame()
87
-
88
- # Streamlit UI setup
89
- st.set_page_config(page_title="Fraud Detection Etherium Prediction App", page_icon="🕵️", layout="wide")
90
- st.title("😎 **Fraud Detection Etherium Prediction App**")
91
- st.subheader("Enter the following information to predict **Fraud Detection Etherium**.")
92
-
93
- st.sidebar.title("🕵️ **Fraud Detection Parameters**")
94
-
95
- # Input features
96
- input_features = {
97
- "Avg min between sent tnx": st.sidebar.number_input("Avg min between sent tnx", min_value=0.0, value=float(df["Avg min between sent tnx"].mean())),
98
- "Avg min between received tnx": st.sidebar.number_input("Avg min between received tnx", min_value=0.0, value=float(df["Avg min between received tnx"].mean())),
99
- "Time difference between first and last (mins)": st.sidebar.number_input("Time difference between first and last (mins)", min_value=0.0, value=float(df["Time difference between first and last (mins)"].mean())),
100
- "Sent tnx": st.sidebar.number_input("Sent tnx", min_value=0.0, value=float(df["Sent tnx"].mean())),
101
- "Received tnx": st.sidebar.number_input("Received tnx", min_value=0.0, value=float(df["Received tnx"].mean())),
102
- "Number of created contracts": st.sidebar.number_input("Number of created contracts", min_value=0, value=int(df["Number of created contracts"].mean())),
103
- "Max value received": st.sidebar.number_input("Max value received", min_value=0.0, value=float(df["Max value received"].mean())),
104
- "Avg value received": st.sidebar.number_input("Avg value received", min_value=0.0, value=float(df["Avg value received"].mean())),
105
- "Avg value sent": st.sidebar.number_input("Avg value sent", min_value=0.0, value=float(df["Avg value sent"].mean())),
106
- "Total either sent": st.sidebar.number_input("Total either sent", min_value=0.0, value=float(df["Total either sent"].mean())),
107
- "Total either balance": st.sidebar.number_input("Total either balance", min_value=0.0, value=float(df["Total either balance"].mean())),
108
- "ERC20 total either received": st.sidebar.number_input("ERC20 total either received", min_value=0.0, value=float(df["ERC20 total either received"].mean())),
109
- "ERC20 total either sent": st.sidebar.number_input("ERC20 total either sent", min_value=0.0, value=float(df["ERC20 total either sent"].mean())),
110
- "ERC20 total either sent contract": st.sidebar.number_input("ERC20 total either sent contract", min_value=0.0, value=float(df["ERC20 total either sent contract"].mean())),
111
- "ERC20 unique sent address": st.sidebar.number_input("ERC20 unique sent address", min_value=0.0, value=float(df["ERC20 unique sent address"].mean())),
112
- "ERC20 unique received token name": st.sidebar.number_input("ERC20 unique received token name", min_value=0.0, value=float(df["ERC20 unique received token name"].mean())),
113
- }
114
-
115
- # Display predict button in main area
116
- st.markdown("---")
117
- if st.button(label=':rainbow[Predict Fraud]'):
118
- # Prepare input data for prediction
119
- input_data = pd.DataFrame([input_features])
120
-
121
- # Predicting the input data
122
- results_df = load_and_predict(input_data)
123
-
124
- # Displaying results
125
- if not results_df.empty:
126
- st.write("### 😎 Prediction Results:")
127
- styled_df = results_df.style.map(lambda x: 'color: green' if x == 'Yes' else 'color: red', subset=['Predicted Fraud'])
128
- st.dataframe(styled_df)
129
-
130
- # Description Section
131
- st.markdown("---")
132
- st.subheader("Description")
133
- st.markdown('''This Streamlit application predicts fraud in Ethereum transactions using multiple machine learning models including LGBM, XGBoost, and Gradient Boosting classifiers.
134
- Users can input transaction information through a user-friendly interface, which includes various fields related to transaction metrics and user activity.
135
-
136
- > **Features:**
137
- > - **Input Components:** Users can provide data using number inputs for transaction-related features.
138
- > - **Data Processing:** Upon submitting the form, the app processes the input data and transforms it using a pre-trained data preprocessor.
139
- > - It leverages SMOTE to address any class imbalance in the data.
140
- > - **Prediction:** The app runs predictions using the loaded models and calculates performance metrics like accuracy, recall, F1 score, and precision.
141
- > - **Results Display:** The predicted fraud status and model performance metrics are displayed in a formatted output for easy interpretation.
142
-
143
- > **Usage:** Just fill out the information about the transaction and click "Predict Fraud" to receive insights on whether the transaction is likely to be fraudulent and how well each model performed.
144
-
145
- > **Disclaimer:** This application is intended for educational purposes only.
146
- ''')
147
-
148
- # Disclaimer Section
149
- st.markdown("---")
150
- st.subheader("Disclaimer")
151
- st.text('''The fraud detection results provided by this app are for informational purposes only.
152
- While we strive for accuracy, the predictions made by the models depend on the quality of the input data
153
- and the model's training. Use this information at your own discretion, and do not solely rely on it for
154
  making financial decisions. Consulting with a financial expert is recommended for critical decisions.''')
 
1
+ import os
2
+ import joblib
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from typing import Any, Dict, List
6
+ from imblearn.over_sampling import SMOTE
7
+ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ # Constants for directories and file names
12
+ MODEL_DIR = 'models'
13
+ DATA_DIR = 'datasets'
14
+ DATA_FILE = 'cleaned_transaction_dataset.csv'
15
+ MODEL_NAMES = [
16
+ 'LGBM Classifier',
17
+ 'XGBoost Classifier',
18
+ ]
19
+
20
+ # Load dataset
21
+ data_path = os.path.join(DATA_DIR, DATA_FILE)
22
+ df = pd.read_csv(data_path)
23
+
24
+ # Load models
25
+ def load_models(model_names: List[str]) -> Dict[str, Any]:
26
+ """Load machine learning models from disk."""
27
+ models = {}
28
+ for name in model_names:
29
+ path = os.path.join(MODEL_DIR, f"{name.replace(' ', '')}.joblib")
30
+ try:
31
+ models[name] = joblib.load(path)
32
+ except Exception as e:
33
+ st.error(f"Error loading model {name}: {str(e)}")
34
+ return models
35
+
36
+ models = load_models(MODEL_NAMES)
37
+
38
+ # Prepare features and target
39
+ X = df.drop(columns=['FLAG'])
40
+ y = df['FLAG']
41
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
42
+
43
+ # Prediction and metrics evaluation function
44
+ def calculate_metrics(y_true, y_pred, average_type='binary'):
45
+ """Calculate and return accuracy, recall, F1, and precision scores."""
46
+ acc = accuracy_score(y_true, y_pred)
47
+ rec = recall_score(y_true, y_pred, average=average_type)
48
+ f1 = f1_score(y_true, y_pred, average=average_type)
49
+ prec = precision_score(y_true, y_pred, average=average_type)
50
+ return acc, rec, f1, prec
51
+
52
+ def load_and_predict(sample):
53
+ try:
54
+ # Using StandardScaler to scale numric features
55
+ scaler = StandardScaler()
56
+ X_train_scaled = scaler.fit_transform(X_train)
57
+ sample_trans = scaler.fit_transform(sample)
58
+
59
+ # Using SMOTE to handle class imbalance
60
+ X_resampled, y_resampled = SMOTE(random_state=123).fit_resample(X_train_scaled, y_train)
61
+
62
+ results = []
63
+ for name, model in models.items():
64
+ y_resampled_pred = model.predict(X_resampled)
65
+ flag_pred = model.predict(sample_trans)
66
+ acc, rec, f1, prec = calculate_metrics(y_resampled, y_resampled_pred)
67
+
68
+ results.append({
69
+ 'Model': name,
70
+ 'Predicted Fraud': 'Yes' if flag_pred[0] == 1 else 'No',
71
+ 'Accuracy %': acc * 100,
72
+ 'Recall %': rec * 100,
73
+ 'F1 %': f1 * 100,
74
+ 'Precision %': prec * 100
75
+ })
76
+
77
+ return pd.DataFrame(results).sort_values(by='Accuracy %', ascending=False)
78
+
79
+ except Exception as e:
80
+ st.error(f"An error occurred during model loading or prediction: {str(e)}")
81
+ return pd.DataFrame()
82
+
83
+ # Streamlit UI setup
84
+ st.set_page_config(page_title="Fraud Detection Etherium Prediction App", page_icon="🕵️", layout="wide")
85
+ st.title("😎 **Fraud Detection Etherium Prediction App**")
86
+ st.subheader("Enter the following information to predict **Fraud Detection Etherium**.")
87
+
88
+ st.sidebar.title("🕵️ **Fraud Detection Parameters**")
89
+
90
+ # Input features
91
+ input_features = {
92
+ "Avg min between sent tnx": st.sidebar.number_input("Avg min between sent tnx", min_value=0.0, value=float(df["Avg min between sent tnx"].mean())),
93
+ "Avg min between received tnx": st.sidebar.number_input("Avg min between received tnx", min_value=0.0, value=float(df["Avg min between received tnx"].mean())),
94
+ "Time difference between first and last (mins)": st.sidebar.number_input("Time difference between first and last (mins)", min_value=0.0, value=float(df["Time difference between first and last (mins)"].mean())),
95
+ "Sent tnx": st.sidebar.number_input("Sent tnx", min_value=0.0, value=float(df["Sent tnx"].mean())),
96
+ "Received tnx": st.sidebar.number_input("Received tnx", min_value=0.0, value=float(df["Received tnx"].mean())),
97
+ "Number of created contracts": st.sidebar.number_input("Number of created contracts", min_value=0, value=int(df["Number of created contracts"].mean())),
98
+ "Max value received": st.sidebar.number_input("Max value received", min_value=0.0, value=float(df["Max value received"].mean())),
99
+ "Avg value received": st.sidebar.number_input("Avg value received", min_value=0.0, value=float(df["Avg value received"].mean())),
100
+ "Avg value sent": st.sidebar.number_input("Avg value sent", min_value=0.0, value=float(df["Avg value sent"].mean())),
101
+ "Total either sent": st.sidebar.number_input("Total either sent", min_value=0.0, value=float(df["Total either sent"].mean())),
102
+ "Total either balance": st.sidebar.number_input("Total either balance", min_value=0.0, value=float(df["Total either balance"].mean())),
103
+ "ERC20 total either received": st.sidebar.number_input("ERC20 total either received", min_value=0.0, value=float(df["ERC20 total either received"].mean())),
104
+ "ERC20 total either sent": st.sidebar.number_input("ERC20 total either sent", min_value=0.0, value=float(df["ERC20 total either sent"].mean())),
105
+ "ERC20 total either sent contract": st.sidebar.number_input("ERC20 total either sent contract", min_value=0.0, value=float(df["ERC20 total either sent contract"].mean())),
106
+ "ERC20 unique sent address": st.sidebar.number_input("ERC20 unique sent address", min_value=0.0, value=float(df["ERC20 unique sent address"].mean())),
107
+ "ERC20 unique received token name": st.sidebar.number_input("ERC20 unique received token name", min_value=0.0, value=float(df["ERC20 unique received token name"].mean())),
108
+ }
109
+
110
+ # Display predict button in main area
111
+ st.markdown("---")
112
+ if st.button(label=':rainbow[Predict Fraud]'):
113
+ # Prepare input data for prediction
114
+ input_data = pd.DataFrame([input_features])
115
+
116
+ # Predicting the input data
117
+ results_df = load_and_predict(input_data)
118
+
119
+ # Displaying results
120
+ if not results_df.empty:
121
+ st.write("### 😎 Prediction Results:")
122
+ styled_df = results_df.style.map(lambda x: 'color: green' if x == 'Yes' else 'color: red', subset=['Predicted Fraud'])
123
+ st.dataframe(styled_df)
124
+
125
+ # Description Section
126
+ st.markdown("---")
127
+ st.subheader("Description")
128
+ st.markdown('''This Streamlit application predicts fraud in Ethereum transactions using multiple machine learning models including LGBM, XGBoost, and Gradient Boosting classifiers.
129
+ Users can input transaction information through a user-friendly interface, which includes various fields related to transaction metrics and user activity.
130
+
131
+ > **Features:**
132
+ > - **Input Components:** Users can provide data using number inputs for transaction-related features.
133
+ > - **Data Processing:** Upon submitting the form, the app processes the input data and transforms it using a pre-trained data preprocessor.
134
+ > - It leverages SMOTE to address any class imbalance in the data.
135
+ > - **Prediction:** The app runs predictions using the loaded models and calculates performance metrics like accuracy, recall, F1 score, and precision.
136
+ > - **Results Display:** The predicted fraud status and model performance metrics are displayed in a formatted output for easy interpretation.
137
+
138
+ > **Usage:** Just fill out the information about the transaction and click "Predict Fraud" to receive insights on whether the transaction is likely to be fraudulent and how well each model performed.
139
+
140
+ > **Disclaimer:** This application is intended for educational purposes only.
141
+ ''')
142
+
143
+ # Disclaimer Section
144
+ st.markdown("---")
145
+ st.subheader("Disclaimer")
146
+ st.text('''The fraud detection results provided by this app are for informational purposes only.
147
+ While we strive for accuracy, the predictions made by the models depend on the quality of the input data
148
+ and the model's training. Use this information at your own discretion, and do not solely rely on it for
 
 
 
 
 
149
  making financial decisions. Consulting with a financial expert is recommended for critical decisions.''')