Girish1432 commited on
Commit
00a3bde
·
verified ·
1 Parent(s): d5c3f44

Upload 4 files

Browse files
Files changed (4) hide show
  1. constants.py +190 -0
  2. create_transactions.py +158 -0
  3. data_utils.py +155 -0
  4. db_utils.py +75 -0
constants.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import numpy as np
3
+
4
+ DB_PATH = "sqlite.db"
5
+ DB_NAME = "transactions"
6
+
7
+ # Parameters
8
+ NUM_USERS = 10
9
+ NUM_TRANSACTIONS = 100000
10
+ START_DATE = datetime.now() - timedelta(days=360)
11
+ END_DATE = datetime.now()
12
+
13
+ # Generate user IDs
14
+ USER_IDS = [f"user_{i}" for i in range(1, NUM_USERS + 1)]
15
+
16
+ # IP addresses and ISPs (for simplicity)
17
+ IP_ADDRESSES = [f"192.168.1.{i}" for i in range(1, 101)]
18
+ ISPs = ["ISP_A", "ISP_B", "ISP_C", "ISP_D", "ISP_E"]
19
+
20
+ # Device and browser types
21
+ DEVICE_TYPES = ["Tablet", "PC", "Mobile"]
22
+ BROWSER_TYPES = ["Chrome", "Safari", "Firefox", "Edge", "Opera"]
23
+
24
+ # Payee types
25
+ PAYEE_TYPES = ["Individual", "Business or corporations", "Financial agency", "Charity", "Educational Institute"]
26
+
27
+ # Authentication levels
28
+ AUTH_LEVELS = ["Low", "Medium", "High"]
29
+
30
+ # Transaction statuses
31
+ STATUSES = ["pending", "completed", "failed"]
32
+
33
+ # Payment methods
34
+ PAYMENT_METHODS = ["Credit Card", "Debit Card", "PayPal", "Bank Transfer", "Cryptocurrency"]
35
+
36
+ # Transaction categories
37
+ CATEGORIES = ["groceries", "utilities", "entertainment", "travel", "healthcare", "education", "shopping", "other"]
38
+
39
+ # Transaction types
40
+ TRANSACTION_TYPES = ["credit", "debit"]
41
+
42
+ # Merchants mapped to payee types
43
+ MERCHANT_PAYEE_MAPPING = {
44
+ "groceries": [f"Supermarket_{i}" for i in range(1, 21)],
45
+ "utilities": [f"UtilityCompany_{i}" for i in range(1, 11)],
46
+ "entertainment": [f"EntertainmentVenue_{i}" for i in range(1, 21)],
47
+ "travel": ([f"TravelAgency_{i}" for i in range(1, 11)] + [f"Airline_{i}" for i in range(1, 11)]),
48
+ "healthcare": (
49
+ [f"Hospital_{i}" for i in range(1, 11)]
50
+ + [f"Clinic_{i}" for i in range(1, 11)]
51
+ + [f"Pharmacy_{i}" for i in range(1, 11)]
52
+ ),
53
+ "education": (
54
+ [f"University_{i}" for i in range(1, 11)]
55
+ + [f"School_{i}" for i in range(1, 11)]
56
+ + [f"Bookstore_{i}" for i in range(1, 11)]
57
+ ),
58
+ "shopping": (
59
+ [f"Mall_{i}" for i in range(1, 11)]
60
+ + [f"ElectronicsStore_{i}" for i in range(1, 11)]
61
+ + [f"ClothingStore_{i}" for i in range(1, 11)]
62
+ ),
63
+ "other": (
64
+ [f"ServiceProvider_{i}" for i in range(1, 11)]
65
+ + [f"Consultant_{i}" for i in range(1, 11)]
66
+ + [f"Freelancer_{i}" for i in range(1, 11)]
67
+ ),
68
+ }
69
+
70
+ # Generate user profile mappings
71
+ USER_PROFILES = {
72
+ user_id: {
73
+ "IPAddress": np.random.choice(
74
+ IP_ADDRESSES, p=[0.9] + ([0.1 / (len(IP_ADDRESSES) - 1)] * (len(IP_ADDRESSES) - 1))
75
+ ),
76
+ "DeviceType": np.random.choice(
77
+ DEVICE_TYPES, p=[0.9] + [0.1 / (len(DEVICE_TYPES) - 1)] * (len(DEVICE_TYPES) - 1)
78
+ ),
79
+ "BrowserType": np.random.choice(
80
+ BROWSER_TYPES, p=[0.9] + [0.1 / (len(BROWSER_TYPES) - 1)] * (len(BROWSER_TYPES) - 1)
81
+ ),
82
+ "ISP": np.random.choice(ISPs, p=[0.9] + [0.1 / (len(ISPs) - 1)] * (len(ISPs) - 1)),
83
+ }
84
+ for user_id in USER_IDS
85
+ }
86
+
87
+ # Columns
88
+ LABEL_COLUMN = "isFraud"
89
+
90
+ RAW_DATA_COLUMNS = [
91
+ "TransactionId",
92
+ "UserId",
93
+ "TransactionTimestamp",
94
+ "TransactionAmount",
95
+ "LagAmount",
96
+ "IPAddress",
97
+ "DeviceType",
98
+ "BrowserType",
99
+ "PayeeType",
100
+ "ISP",
101
+ "OTP",
102
+ "AuthenticationLevel",
103
+ "Status",
104
+ "PaymentMethod",
105
+ "Category",
106
+ "Merchant",
107
+ "TransactionType",
108
+ "isFraud",
109
+ ]
110
+ RAW_DATA_COLUMN_TYPES = {
111
+ "TransactionId": "TEXT",
112
+ "UserId": "TEXT",
113
+ "TransactionTimestamp": "TEXT",
114
+ "TransactionAmount": "REAL",
115
+ "LagAmount": "REAL",
116
+ "IPAddress": "TEXT",
117
+ "DeviceType": "TEXT",
118
+ "BrowserType": "TEXT",
119
+ "PayeeType": "TEXT",
120
+ "ISP": "TEXT",
121
+ "OTP": "INTEGER",
122
+ "AuthenticationLevel": "TEXT",
123
+ "Status": "TEXT",
124
+ "PaymentMethod": "TEXT",
125
+ "Category": "TEXT",
126
+ "Merchant": "TEXT",
127
+ "TransactionType": "TEXT",
128
+ "isFraud": "INTEGER",
129
+ }
130
+ CATEGORICAL_COLS = [
131
+ "DeviceType",
132
+ "BrowserType",
133
+ "PayeeType",
134
+ "ISP",
135
+ "Status",
136
+ "PaymentMethod",
137
+ "Category",
138
+ "TransactionType",
139
+ ]
140
+ FEATURES = [
141
+ "TransactionAmount",
142
+ "LagAmount",
143
+ "OTP",
144
+ "DayOfMonth",
145
+ "DayOfWeek",
146
+ "HourOfDay",
147
+ "TimeSinceLastTx",
148
+ "KnownDeviceType",
149
+ "KnownBrowserType",
150
+ "KnownIP",
151
+ "KnownISP",
152
+ "DeviceType_Mobile",
153
+ "DeviceType_PC",
154
+ "DeviceType_Tablet",
155
+ "BrowserType_Chrome",
156
+ "BrowserType_Edge",
157
+ "BrowserType_Firefox",
158
+ "BrowserType_Opera",
159
+ "BrowserType_Safari",
160
+ "PayeeType_Business or corporations",
161
+ "PayeeType_Charity",
162
+ "PayeeType_Educational Institute",
163
+ "PayeeType_Financial agency",
164
+ "PayeeType_Individual",
165
+ "ISP_ISP_A",
166
+ "ISP_ISP_B",
167
+ "ISP_ISP_C",
168
+ "ISP_ISP_D",
169
+ "ISP_ISP_E",
170
+ "Status_completed",
171
+ "Status_failed",
172
+ "Status_pending",
173
+ "PaymentMethod_Bank Transfer",
174
+ "PaymentMethod_Credit Card",
175
+ "PaymentMethod_Cryptocurrency",
176
+ "PaymentMethod_Debit Card",
177
+ "PaymentMethod_PayPal",
178
+ "Category_education",
179
+ "Category_entertainment",
180
+ "Category_groceries",
181
+ "Category_healthcare",
182
+ "Category_other",
183
+ "Category_shopping",
184
+ "Category_travel",
185
+ "Category_utilities",
186
+ "TransactionType_credit",
187
+ "TransactionType_debit",
188
+ "AuthenticationLevel",
189
+ ]
190
+ SCALED_COLUMNS = ["DayOfMonth", "DayOfWeek", "HourOfDay", "TimeSinceLastTx", "TransactionAmount", "LagAmount"]
create_transactions.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import uuid
3
+ from datetime import timedelta
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from constants import (
9
+ RAW_DATA_COLUMNS,
10
+ START_DATE,
11
+ END_DATE,
12
+ MERCHANT_PAYEE_MAPPING,
13
+ PAYEE_TYPES,
14
+ NUM_TRANSACTIONS,
15
+ USER_IDS,
16
+ IP_ADDRESSES,
17
+ ISPs,
18
+ AUTH_LEVELS,
19
+ STATUSES,
20
+ CATEGORIES,
21
+ PAYMENT_METHODS,
22
+ TRANSACTION_TYPES,
23
+ DEVICE_TYPES,
24
+ BROWSER_TYPES,
25
+ USER_PROFILES,
26
+ )
27
+ from db_utils import insert_multiple_transactions, delete_all_transactions
28
+ from fraud.constants import LABEL_COLUMN
29
+ from train import prepare_train_data
30
+
31
+
32
+ # Function to generate random timestamps
33
+ def random_date(start, end):
34
+ return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))
35
+
36
+
37
+ # Function to get merchant based on category
38
+ def get_merchant_and_payee(category):
39
+ merchants = MERCHANT_PAYEE_MAPPING[category]
40
+ merchant = random.choice(merchants)
41
+ if "Supermarket" in merchant or "Mall" in merchant or "Store" in merchant:
42
+ payee_type = "Business or corporations"
43
+ elif "UtilityCompany" in merchant:
44
+ payee_type = "Financial agency"
45
+ elif "Hospital" in merchant or "Clinic" in merchant or "Pharmacy" in merchant:
46
+ payee_type = "Individual"
47
+ elif "University" in merchant or "School" in merchant or "Bookstore" in merchant:
48
+ payee_type = "Educational Institute"
49
+ elif "ServiceProvider" in merchant or "Consultant" in merchant or "Freelancer" in merchant:
50
+ payee_type = "Individual"
51
+ else:
52
+ payee_type = random.choice(PAYEE_TYPES)
53
+ return merchant, payee_type
54
+
55
+
56
+ # Function to simulate a transaction
57
+ def generate_transaction(user_id):
58
+ device_type = USER_PROFILES[user_id]["DeviceType"] if np.random.rand() < 0.9 else np.random.choice(DEVICE_TYPES)
59
+ browser_type = USER_PROFILES[user_id]["BrowserType"] if np.random.rand() < 0.9 else np.random.choice(BROWSER_TYPES)
60
+ isp = USER_PROFILES[user_id]["ISP"] if np.random.rand() < 0.9 else np.random.choice(ISPs)
61
+ transaction_id = str(uuid.uuid4())
62
+ transaction_timestamp = random_date(START_DATE, END_DATE)
63
+ amount = round(random.uniform(1, 10000), 2) # Transaction amount between $1 and $10000
64
+ lag_amount = round(amount * random.uniform(0.5, 1.5), 2) # Lag amount as a factor of transaction amount
65
+ ip_address = random.choice(IP_ADDRESSES)
66
+ otp = np.random.binomial(1, 0.85) # 85% of transactions use OTP
67
+ auth_level = random.choice(AUTH_LEVELS)
68
+ is_fraud = np.random.binomial(1, 0.02) # Assume 2% transactions are fraudulent
69
+ status = random.choice(STATUSES)
70
+ payment_method = random.choice(PAYMENT_METHODS)
71
+ category = random.choice(CATEGORIES)
72
+ merchant, payee_type = get_merchant_and_payee(category)
73
+ transaction_type = random.choice(TRANSACTION_TYPES)
74
+
75
+ return [
76
+ transaction_id,
77
+ user_id,
78
+ transaction_timestamp,
79
+ amount,
80
+ lag_amount,
81
+ ip_address,
82
+ device_type,
83
+ browser_type,
84
+ payee_type,
85
+ isp,
86
+ otp,
87
+ auth_level,
88
+ status,
89
+ payment_method,
90
+ category,
91
+ merchant,
92
+ transaction_type,
93
+ is_fraud,
94
+ ]
95
+
96
+
97
+ def update_fraud_status(df):
98
+ is_fraud = []
99
+ for i, row in df.iterrows():
100
+ fraud_prob = row[LABEL_COLUMN]
101
+
102
+ if row["TransactionAmount"] > 6000:
103
+ fraud_prob += 0.3
104
+ if row["TimeSinceLastTx"] < 3600:
105
+ fraud_prob += 0.1
106
+
107
+ if row["KnownIP"] == 0:
108
+ fraud_prob += 0.1
109
+ if row["KnownDeviceType"] == 0:
110
+ fraud_prob += 0.1
111
+ if row["KnownBrowserType"] == 0:
112
+ fraud_prob += 0.1
113
+ if row["KnownISP"] == 0:
114
+ fraud_prob += 0.1
115
+
116
+ if row["OTP"]:
117
+ fraud_prob -= 0.3
118
+
119
+ if row["AuthenticationLevel"] > 2:
120
+ fraud_prob -= 0.2
121
+ elif row["AuthenticationLevel"] > 1:
122
+ fraud_prob -= 0.1
123
+ else:
124
+ fraud_prob -= 0.05
125
+
126
+ if fraud_prob < 0:
127
+ fraud_prob = 0
128
+ elif fraud_prob > 1:
129
+ fraud_prob = 1
130
+
131
+ row_is_fraud = np.random.choice([0, 1], p=[1 - fraud_prob, fraud_prob])
132
+
133
+ is_fraud.append(row_is_fraud)
134
+ return is_fraud
135
+
136
+
137
+ def main():
138
+ # delete_all_transactions() # Commented out to avoid deleting transactions
139
+ # Generate data
140
+ transactions = [generate_transaction(user_id=random.choice(USER_IDS)) for _ in range(NUM_TRANSACTIONS)]
141
+ # Create DataFrame
142
+ df = pd.DataFrame(
143
+ transactions,
144
+ columns=RAW_DATA_COLUMNS,
145
+ )
146
+
147
+ # Update fraud status
148
+ df2, _ = prepare_train_data(df.copy())
149
+ is_fraud = update_fraud_status(df2)
150
+ df[LABEL_COLUMN] = is_fraud
151
+
152
+ # Save
153
+ # df.to_csv("transactions_data.csv", index=False)
154
+ insert_multiple_transactions(df)
155
+
156
+
157
+ if __name__ == "__main__":
158
+ main()
data_utils.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from datetime import datetime
3
+
4
+ import joblib
5
+ import pandas as pd
6
+ import requests
7
+
8
+ # Load the trained model and feature names
9
+ model, feature_names = joblib.load("model.pkl")
10
+
11
+
12
+ # Function to fetch data from final_data.csv based on user input
13
+ def fetch_data(user_id, txn_amount):
14
+ final_data = pd.read_csv("final_data.csv")
15
+ fetched_data = final_data[(final_data["user_id"] == user_id) & (final_data["TxnAmount($)"] == txn_amount)]
16
+ return fetched_data
17
+
18
+
19
+ # Define the prediction function
20
+ def predict_fraud(input_data):
21
+ # Reorder and align input_data to match the feature names used during training
22
+ input_data = input_data.reindex(columns=feature_names, fill_value=0)
23
+ # Make the prediction
24
+ prediction = model.predict(input_data)
25
+ return prediction[0]
26
+
27
+
28
+ # Function to handle real-time data and make predictions
29
+ def handle_real_time_data(real_time_data):
30
+ predictions = []
31
+ for index, row in real_time_data.iterrows():
32
+ input_data = pd.DataFrame([row])
33
+ prediction = predict_fraud(input_data)
34
+ predictions.append(prediction)
35
+ real_time_data["Prediction"] = predictions
36
+ return real_time_data
37
+
38
+
39
+ def fetch_test_data():
40
+ # Simulate real-time data fetching from an API or a streaming service
41
+ return pd.read_csv("test.csv") # Example CSV file for real-time transactions
42
+
43
+
44
+ # Function to determine time of day
45
+ def time_of_day(hour, minute, second):
46
+ if 5 <= hour < 11:
47
+ return 0
48
+ elif hour == 11 and minute < 1:
49
+ return 0.166666667
50
+ elif 11 <= hour < 13:
51
+ return 0.166666667
52
+ elif hour == 13 and minute < 1:
53
+ return 0.333333333
54
+ elif 13 <= hour < 15:
55
+ return 0.333333333
56
+ elif hour == 15 and minute < 1:
57
+ return 0.5
58
+ elif 15 <= hour < 17:
59
+ return 0.5
60
+ elif hour == 17 and minute < 1:
61
+ return 0.666666667
62
+ elif 17 <= hour < 19:
63
+ return 0.666666667
64
+ elif hour == 19 and minute < 1:
65
+ return 0.833333333
66
+ elif 19 <= hour < 24:
67
+ return 0.833333333
68
+ elif hour == 0 and minute < 1:
69
+ return 1
70
+ elif 0 <= hour < 5:
71
+ return 1
72
+ else:
73
+ return "Invalid time"
74
+
75
+
76
+ def payee_type(payee):
77
+ payee_types = ["Individual", "Business or corporations", "Financial agency", "Charity", "Educational Institute"]
78
+ payee_values = [0, 0.369070247, 0.630929753, 0.834043767, 1]
79
+ if payee in payee_types:
80
+ return payee_values[payee_types.index(payee)]
81
+ else:
82
+ return "Invalid Payee Type"
83
+
84
+
85
+ # Get current location details including latitude and longitude
86
+ def get_location():
87
+ try:
88
+ res = requests.get("https://ipinfo.io/")
89
+ data = res.json()
90
+ loc = data["loc"].split(",")
91
+ latitude = loc[0]
92
+ longitude = loc[1]
93
+ return data["city"], data["region"], data["country"], data["ip"], latitude, longitude
94
+ except Exception as e:
95
+ print("Unable to retrieve location:", e)
96
+ return None, None, None, None, None, None
97
+
98
+
99
+ # Function to generate synthetic data
100
+ def generate_synthetic_data(user_id, txn_amount):
101
+ # Define the feature values based on model requirements
102
+ lag_amount1 = random.randint(50, 200) # Random lag_amount1
103
+ average_amount_3txns = random.randint(50, 200) # Random average amount of last 3 transactions
104
+ time_since_last_txn = random.randint(1, 60) # Random time since last transaction (in minutes)
105
+ velocity_txns = random.randint(1, 10) # Random velocity of transactions
106
+ weekday = random.choice([0, 1, 2, 3, 4, 5, 6]) # Random weekday encoded as numerical value
107
+ ip_used_known = random.choice([0, 1]) # Random IP used known
108
+ time_slots = time_of_day(
109
+ datetime.now().hour, datetime.now().minute, datetime.now().second
110
+ ) # Time of day based on current time
111
+ known_device_browser = random.choice([0, 1]) # Random known device and browser
112
+ payee_type_value = payee_type(
113
+ random.choice(
114
+ ["Individual", "Business or corporations", "Financial agency", "Charity", "Educational Institute"]
115
+ )
116
+ ) # Random payee type value
117
+ time_since_payee_regn = random.randint(1, 365) # Random time since payee registration (in days)
118
+ known_location = random.choice([0, 1]) # Random known location
119
+ otp = random.choice([0, 1]) # Random OTP
120
+ authentication_level = random.choice([0, 1, 2]) # Random authentication level
121
+ time_limit = random.choice([0, 1]) # Random time limit
122
+ browser_known = random.choice([0, 1]) # Random browser known
123
+ location_change = random.choice([0, 1]) # Random location change
124
+
125
+ # Create a DataFrame with the generated synthetic data
126
+ synthetic_data = pd.DataFrame(
127
+ {
128
+ "user_id": [user_id],
129
+ "TxnAmount($)": [txn_amount],
130
+ "LagAmount1": [lag_amount1],
131
+ "AverageAmount(Last3txns)": [average_amount_3txns],
132
+ "TimeSinceLastTxn": [time_since_last_txn],
133
+ "VelocityTxns": [velocity_txns],
134
+ "WeekDay": [weekday],
135
+ "IPUsedKnown": [ip_used_known],
136
+ "TimeSlots": [time_slots],
137
+ "KnownDeviceBrowser": [known_device_browser],
138
+ "PayeeType": [payee_type_value],
139
+ "Timesincepayeeregn": [time_since_payee_regn],
140
+ "KnownLocation": [known_location],
141
+ "OTP": [otp],
142
+ "AuthenticationLevel": [authentication_level],
143
+ "Timelimit": [time_limit],
144
+ "BrowserKnown": [browser_known],
145
+ "location_change": [location_change],
146
+ }
147
+ )
148
+
149
+ return synthetic_data
150
+
151
+
152
+ # Function to simulate fetching real-time data
153
+ def fetch_real_time_data(user_id, txn_amount):
154
+ # Simulate real-time data fetching from an API or a streaming service
155
+ return generate_synthetic_data(user_id, txn_amount) # Generating synthetic data instead of reading from CSV
db_utils.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+
3
+ import pandas as pd
4
+
5
+ from constants import DB_PATH, DB_NAME, RAW_DATA_COLUMNS, RAW_DATA_COLUMN_TYPES
6
+
7
+
8
+ def get_connection():
9
+ # Connect to SQLite database (or create it if it doesn't exist)
10
+ db = sqlite3.connect(DB_PATH)
11
+ cursor = db.cursor()
12
+ return db, cursor
13
+
14
+
15
+ def execute_query(query):
16
+ print(f"Executing query: {repr(query)}")
17
+ db, cursor = get_connection()
18
+ cursor.execute(query)
19
+ cursor.close()
20
+ # Commit the changes
21
+ db.commit()
22
+ db.close()
23
+
24
+
25
+ def create_transaction_table():
26
+ # Create a table
27
+ query = f"CREATE TABLE IF NOT EXISTS {DB_NAME} ("
28
+ for col in RAW_DATA_COLUMNS:
29
+ query += f"{col} {RAW_DATA_COLUMN_TYPES[col]}, "
30
+ query = query[:-2]
31
+ query += ")"
32
+ execute_query(query)
33
+
34
+
35
+ def insert_multiple_transactions(df):
36
+ create_transaction_table()
37
+ db, _ = get_connection()
38
+ df.to_sql(DB_NAME, db, if_exists="append", index=False)
39
+ db.commit()
40
+ db.close()
41
+
42
+
43
+ # Function to insert a new user into the users table
44
+ def insert_single_transaction(transaction):
45
+ transaction["TransactionTimestamp"] = str(transaction["TransactionTimestamp"])
46
+ query = f"INSERT INTO {DB_NAME} " f"{tuple(transaction.keys())}" f" VALUES " f"{tuple(transaction.values())}"
47
+ execute_query(query)
48
+
49
+
50
+ def execute_return_query(query):
51
+ db, cursor = get_connection()
52
+ cursor.execute(query)
53
+ rows = cursor.fetchall()
54
+ cursor.close()
55
+ # Commit the changes
56
+ db.commit()
57
+ db.close()
58
+ return rows
59
+
60
+
61
+ # Function to fetch all users from the users table
62
+ def fetch_all_transactions():
63
+ db, cursor = get_connection()
64
+ query = f"SELECT * FROM {DB_NAME}"
65
+ df = pd.read_sql_query(query, db)
66
+ db.close()
67
+ return df
68
+
69
+
70
+ def delete_all_transactions():
71
+ execute_return_query(f"DELETE FROM {DB_NAME}")
72
+
73
+
74
+ if __name__ == "__main__":
75
+ delete_all_transactions()