Spaces:
Sleeping
Sleeping
spanmandewit
commited on
Commit
·
82debdf
1
Parent(s):
ab3e833
initial commit
Browse files- .gitignore +157 -0
- db_utils/__init__.py +0 -0
- db_utils/generate_db.py +121 -0
- db_utils/summarize_db.py +111 -0
- ffi_reports/__init__.py +0 -0
- ffi_reports/dashboard.py +185 -0
- ffi_reports/generate_reports.py +411 -0
- main.py +8 -0
- pytest.ini +2 -0
- requirements.txt +7 -0
- tests/__init__.py +0 -0
- tests/test_generate_reports.py +41 -0
.gitignore
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
105 |
+
__pypackages__/
|
106 |
+
|
107 |
+
# Celery stuff
|
108 |
+
celerybeat-schedule
|
109 |
+
celerybeat.pid
|
110 |
+
|
111 |
+
# SageMath parsed files
|
112 |
+
*.sage.py
|
113 |
+
|
114 |
+
# Environments
|
115 |
+
.env
|
116 |
+
.venv
|
117 |
+
env/
|
118 |
+
venv/
|
119 |
+
ENV/
|
120 |
+
env.bak/
|
121 |
+
venv.bak/
|
122 |
+
|
123 |
+
# Spyder project settings
|
124 |
+
.spyderproject
|
125 |
+
.spyproject
|
126 |
+
|
127 |
+
# Rope project settings
|
128 |
+
.ropeproject
|
129 |
+
|
130 |
+
# mkdocs documentation
|
131 |
+
/site
|
132 |
+
|
133 |
+
# mypy
|
134 |
+
.mypy_cache/
|
135 |
+
.dmypy.json
|
136 |
+
dmypy.json
|
137 |
+
|
138 |
+
# Pyre type checker
|
139 |
+
.pyre/
|
140 |
+
|
141 |
+
# pytype static type analyzer
|
142 |
+
.pytype/
|
143 |
+
|
144 |
+
# Cython debug symbols
|
145 |
+
cython_debug/
|
146 |
+
|
147 |
+
# PyCharm
|
148 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
149 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
150 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
151 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
152 |
+
.idea/
|
153 |
+
|
154 |
+
|
155 |
+
# project specific
|
156 |
+
data/
|
157 |
+
*.pdf
|
db_utils/__init__.py
ADDED
File without changes
|
db_utils/generate_db.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Code to generate the sqlite3 database.
|
3 |
+
|
4 |
+
Adjusted a few things from the original code to make it work with this case:
|
5 |
+
- Changed the method to use parameters instead of directly parsing arguments
|
6 |
+
"""
|
7 |
+
import os
|
8 |
+
import argparse
|
9 |
+
import datetime
|
10 |
+
import pandas as pd
|
11 |
+
import random
|
12 |
+
import sqlite3 as sl
|
13 |
+
import sys
|
14 |
+
|
15 |
+
|
16 |
+
class TxnGenerator():
|
17 |
+
"""
|
18 |
+
Generate a given number of users and transactions. Transaction properties are randomally determined.
|
19 |
+
At the same time as the internal details are generated, a third party record is generated to represent
|
20 |
+
the third part reconciliation records.
|
21 |
+
"""
|
22 |
+
|
23 |
+
def __init__(self, db_conn):
|
24 |
+
self.db_conn = db_conn
|
25 |
+
|
26 |
+
def generate(self, num_users, num_txns):
|
27 |
+
# Generate num_users users.
|
28 |
+
user_ids = []
|
29 |
+
user_types = []
|
30 |
+
for i in range(num_users):
|
31 |
+
user_ids.append(i)
|
32 |
+
user_types.append(random.choices([0, 1], weights=[3, 1])[0])
|
33 |
+
df_users = pd.DataFrame({"id": user_ids, "type": user_types}).set_index("id")
|
34 |
+
df_users.to_sql("users", self.db_conn)
|
35 |
+
|
36 |
+
# Generate num_txns transactions, starting from the beginning of 2022.
|
37 |
+
txn_id = 0
|
38 |
+
timestamp = datetime.datetime(2022, 1, 1, 0, 0, 0)
|
39 |
+
txn_ids = []
|
40 |
+
txn_created_at = []
|
41 |
+
txn_products = []
|
42 |
+
txn_amounts = []
|
43 |
+
txn_statuses = []
|
44 |
+
txn_user_ids = []
|
45 |
+
txn_completed_at = []
|
46 |
+
tp_ids = []
|
47 |
+
tp_amounts = []
|
48 |
+
tp_timestamps = []
|
49 |
+
tp_products = []
|
50 |
+
while txn_id < num_txns:
|
51 |
+
# Every ten seconds, 50/50 create a transaction or do nothing.
|
52 |
+
action = random.choices(["skip", "txn"], weights=[1, 1])[0]
|
53 |
+
if action == "txn":
|
54 |
+
txn_ids.append(txn_id)
|
55 |
+
txn_created_at.append(timestamp)
|
56 |
+
txn_products.append(random.choices(["ProdA", "ProdB"], weights=[3, 5])[0])
|
57 |
+
txn_amounts.append(max(0.01, round(random.normalvariate(20, 10), 2)))
|
58 |
+
# Most txns succeed, some fail, a few get stuck in PENDING.
|
59 |
+
txn_statuses.append(random.choices(["SUCCESS", "FAILURE", "PENDING"], weights=[1000, 20, 2])[0])
|
60 |
+
txn_user_ids.append(random.choice(range(num_users)))
|
61 |
+
# Transactions normally settle within a few seconds, but can take longer. Cap at 3 days.
|
62 |
+
txn_completed_at.append(
|
63 |
+
timestamp + datetime.timedelta(seconds=min(random.lognormvariate(3, 10), 72 * 60 * 60)))
|
64 |
+
# Rarely the third parties just don't record the transaction.
|
65 |
+
tp_action = random.choices(["skip", "txn"], weights=[1, 2000])[0]
|
66 |
+
if tp_action == "txn" and txn_statuses[-1] != "FAILURE":
|
67 |
+
# Note failed transactions are not recorded. Some transactions are reported in recon, but
|
68 |
+
# FFI think are stuck pending.
|
69 |
+
tp_ids.append(txn_id)
|
70 |
+
# Rarely the third parties are out by a penny/cent.
|
71 |
+
tp_amounts.append(txn_amounts[-1] + random.choices([-0.01, 0, 0.01], weights=[1, 2000, 1])[0])
|
72 |
+
# Third party settlement timestamps are ususally around when FFI thinks the txn completed. But with some variance.
|
73 |
+
tp_timestamps.append(
|
74 |
+
txn_completed_at[-1] + datetime.timedelta(seconds=random.normalvariate(0, 180)))
|
75 |
+
tp_products.append(txn_products[-1])
|
76 |
+
txn_id += 1
|
77 |
+
timestamp += datetime.timedelta(seconds=10)
|
78 |
+
|
79 |
+
# Write the FFI transaction records to the DB.
|
80 |
+
df_txns = pd.DataFrame(
|
81 |
+
{"id": txn_ids, "created_at": txn_created_at, "product": txn_products, "amount": txn_amounts,
|
82 |
+
"status": txn_statuses, "user_id": txn_user_ids, "completed_at": txn_completed_at}).set_index("id")
|
83 |
+
df_txns.to_sql("transactions", self.db_conn)
|
84 |
+
|
85 |
+
# Split the third party recon records by product, then write to their respective DB tables.
|
86 |
+
df_tp_txns = pd.DataFrame(
|
87 |
+
{"id": tp_ids, "amount": tp_amounts, "timestamp": tp_timestamps, "product": tp_products}).set_index("id")
|
88 |
+
df_tpa_txns = df_tp_txns[df_tp_txns["product"] == "ProdA"].drop(columns=["product"])
|
89 |
+
df_tpb_txns = df_tp_txns[df_tp_txns["product"] == "ProdB"].drop(columns=["product"])
|
90 |
+
df_tpa_txns.to_sql("tpa_recon", self.db_conn)
|
91 |
+
df_tpb_txns.to_sql("tpb_recon", self.db_conn)
|
92 |
+
|
93 |
+
def report(self):
|
94 |
+
# Query the transactions table to give some overall stats.
|
95 |
+
report_df = pd.read_sql(
|
96 |
+
"SELECT MIN(created_at) AS start, MAX(created_at) AS end, COUNT(*) AS count FROM transactions",
|
97 |
+
self.db_conn)
|
98 |
+
print(
|
99 |
+
f"Generated {report_df.at[0, 'count']} transactions from {report_df.at[0, 'start']} to {report_df.at[0, 'end']}.")
|
100 |
+
|
101 |
+
|
102 |
+
def main(num_users, num_txns):
|
103 |
+
data_dir = "./data"
|
104 |
+
filename = "generated_txns.db"
|
105 |
+
os.makedirs(data_dir, exist_ok=True)
|
106 |
+
|
107 |
+
db_conn = sl.connect(os.path.join(data_dir, filename))
|
108 |
+
gen = TxnGenerator(db_conn)
|
109 |
+
gen.generate(num_users, num_txns)
|
110 |
+
gen.report()
|
111 |
+
|
112 |
+
|
113 |
+
if __name__ == "__main__":
|
114 |
+
random.seed(2022) # Aid repeatability
|
115 |
+
|
116 |
+
parser = argparse.ArgumentParser()
|
117 |
+
parser.add_argument("num_users", type=int, help="Number of users to generate")
|
118 |
+
parser.add_argument("num_txns", type=int, help="Number of transactions to generate")
|
119 |
+
args = parser.parse_args()
|
120 |
+
|
121 |
+
sys.exit(main(args.num_users, args.num_txns))
|
db_utils/summarize_db.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Code to quickly explore the sqlite3 database.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
import datetime
|
8 |
+
import pandas as pd
|
9 |
+
import random
|
10 |
+
import sqlite3 as sl
|
11 |
+
import sys
|
12 |
+
|
13 |
+
|
14 |
+
def print_sep():
|
15 |
+
print('------------------------------------------')
|
16 |
+
|
17 |
+
|
18 |
+
class DbSummarizer():
|
19 |
+
"""
|
20 |
+
Summarizes the sqlite3 table by printing:
|
21 |
+
1. Tables
|
22 |
+
2. Number of rows in each table
|
23 |
+
"""
|
24 |
+
|
25 |
+
def __init__(self, path: str = "./data/generated_txns.db"):
|
26 |
+
"""
|
27 |
+
Initialize the DbSummarizer object.
|
28 |
+
|
29 |
+
Args:
|
30 |
+
path (str): Path to the sqlite3 database.
|
31 |
+
"""
|
32 |
+
self.path = path
|
33 |
+
self.db_conn = None
|
34 |
+
self.db_cursor = None
|
35 |
+
|
36 |
+
def connect(self):
|
37 |
+
"""
|
38 |
+
Connect to the sqlite3 database.
|
39 |
+
"""
|
40 |
+
self.db_conn = sl.connect(self.path)
|
41 |
+
self.db_cursor = self.db_conn.cursor()
|
42 |
+
|
43 |
+
def disconnect(self):
|
44 |
+
"""
|
45 |
+
Disconnect from the sqlite3 database.
|
46 |
+
"""
|
47 |
+
self.db_conn.close()
|
48 |
+
|
49 |
+
def get_table_names(self):
|
50 |
+
"""
|
51 |
+
Get the names of the tables in the sqlite3 database.
|
52 |
+
"""
|
53 |
+
self.db_cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
54 |
+
table_names = self.db_cursor.fetchall()
|
55 |
+
return table_names
|
56 |
+
|
57 |
+
def summarize(self):
|
58 |
+
"""
|
59 |
+
Summarize the sqlite3 database by printing:
|
60 |
+
1. Number of tables
|
61 |
+
2. Table names
|
62 |
+
3. Number of rows in each table
|
63 |
+
4. Column names and first 10 rows in each table
|
64 |
+
"""
|
65 |
+
table_names = self.get_table_names()
|
66 |
+
print(f"This db has {len(table_names)} tables.")
|
67 |
+
print(f"Table names: {table_names}")
|
68 |
+
for table_name in table_names:
|
69 |
+
table_name_str = table_name[0]
|
70 |
+
print_sep()
|
71 |
+
print(f"Table name: {table_name_str}")
|
72 |
+
print_sep()
|
73 |
+
|
74 |
+
# Get nr of rows
|
75 |
+
self.db_cursor.execute(f"SELECT COUNT(*) FROM {table_name[0]};")
|
76 |
+
nr_rows = self.db_cursor.fetchone()[0]
|
77 |
+
print(f"Number of rows: {nr_rows}")
|
78 |
+
print_sep()
|
79 |
+
|
80 |
+
# Get column names and first 10 rows
|
81 |
+
self.db_cursor.execute(f"SELECT * FROM {table_name[0]} LIMIT 10;")
|
82 |
+
column_names = [description[0] for description in self.db_cursor.description]
|
83 |
+
print(f"Column names: {column_names}")
|
84 |
+
first_10_rows = self.db_cursor.fetchall()
|
85 |
+
df_first_10_rows = pd.DataFrame(first_10_rows, columns=column_names)
|
86 |
+
tables_dir = "./data/tables"
|
87 |
+
os.makedirs(tables_dir, exist_ok=True)
|
88 |
+
output_path = f"./data/tables/{table_name_str}_first_10_rows.csv"
|
89 |
+
df_first_10_rows.to_csv(output_path, index=False)
|
90 |
+
print(f"First 10 rows:")
|
91 |
+
print(df_first_10_rows)
|
92 |
+
print_sep()
|
93 |
+
print()
|
94 |
+
|
95 |
+
def __call__(self):
|
96 |
+
"""
|
97 |
+
Call the class, connect with db, summarize the sqlite3 database, and disconnect
|
98 |
+
"""
|
99 |
+
self.connect()
|
100 |
+
self.summarize()
|
101 |
+
self.disconnect()
|
102 |
+
|
103 |
+
|
104 |
+
if __name__ == "__main__":
|
105 |
+
print(os.getcwd())
|
106 |
+
parser = argparse.ArgumentParser()
|
107 |
+
parser.add_argument("--path", type=str, default="./data/generated_txns.db")
|
108 |
+
args = parser.parse_args()
|
109 |
+
|
110 |
+
db_summarizer = DbSummarizer(path=args.path)
|
111 |
+
db_summarizer()
|
ffi_reports/__init__.py
ADDED
File without changes
|
ffi_reports/dashboard.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Code to run streamlit dashboard
|
3 |
+
"""
|
4 |
+
|
5 |
+
import glob
|
6 |
+
from typing import Optional
|
7 |
+
import os
|
8 |
+
import time
|
9 |
+
import datetime
|
10 |
+
|
11 |
+
import streamlit as st
|
12 |
+
import pandas as pd
|
13 |
+
import plotly.graph_objects as go
|
14 |
+
|
15 |
+
from ffi_reports.generate_reports import generate_finance_reports
|
16 |
+
from db_utils import generate_db
|
17 |
+
|
18 |
+
|
19 |
+
class ReportsHelper:
|
20 |
+
DEFAULT_DB_PATH = "./data/generated_txns.db"
|
21 |
+
|
22 |
+
AVAILABLE_PRODUCTS = [
|
23 |
+
"ProdA",
|
24 |
+
"ProdB"
|
25 |
+
]
|
26 |
+
|
27 |
+
DEFAULT_GENERAL_REPORTS = [
|
28 |
+
"management_summary"
|
29 |
+
]
|
30 |
+
|
31 |
+
DEFAULT_PRODUCT_REPORTS = [
|
32 |
+
"summary_report",
|
33 |
+
"exception_report_missing_transactions",
|
34 |
+
"exception_report_missing_recon",
|
35 |
+
"exception_report_amount_mismatch"
|
36 |
+
]
|
37 |
+
|
38 |
+
def __init__(self, reports_dir: Optional[str] = "./data/reports"):
|
39 |
+
"""
|
40 |
+
Initialize the ReportsHelper object which helps find the reports.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
reports_dir (str, optional): The directory where the reports are stored.
|
44 |
+
"""
|
45 |
+
self.reports_dir = reports_dir
|
46 |
+
self.product_reports = ReportsHelper.DEFAULT_PRODUCT_REPORTS
|
47 |
+
self.general_reports = ReportsHelper.DEFAULT_GENERAL_REPORTS
|
48 |
+
self.available_products = ReportsHelper.AVAILABLE_PRODUCTS
|
49 |
+
self.db_path = ReportsHelper.DEFAULT_DB_PATH
|
50 |
+
|
51 |
+
if not os.path.exists(self.db_path):
|
52 |
+
load_str = f"Generating database at: {self.db_path} " \
|
53 |
+
f"since running for the first time. This may take a while..."
|
54 |
+
print(load_str)
|
55 |
+
with st.spinner(load_str):
|
56 |
+
generate_db.main(1000, 1000000)
|
57 |
+
else:
|
58 |
+
print(f"Database found at: {self.db_path}")
|
59 |
+
|
60 |
+
def get_report_filepath(self, report_type: str, product_name: Optional[str] = None,
|
61 |
+
reports_dir: Optional[str] = "./data/reports") -> str:
|
62 |
+
"""
|
63 |
+
Get the filepath for a report.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
report_type (str): The type of report to get the filepath for. One of:
|
67 |
+
- management_summary
|
68 |
+
- summary_report
|
69 |
+
- exception_report_missing_transactions
|
70 |
+
- exception_report_missing_recon
|
71 |
+
- exception_report_amount_mismatch
|
72 |
+
product_name (str): The name of the product to get the report for. Required for product reports.
|
73 |
+
reports_dir (str, optional): The directory where the reports are stored.
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
|
77 |
+
"""
|
78 |
+
report_type = report_type.lower()
|
79 |
+
if report_type in self.product_reports and product_name is None:
|
80 |
+
raise ValueError("'product_name' must be provided for product reports.")
|
81 |
+
|
82 |
+
if report_type == 'management_summary':
|
83 |
+
|
84 |
+
management_summaries = glob.glob(os.path.join(reports_dir, "management_summary_*.csv"))
|
85 |
+
if len(management_summaries) == 0:
|
86 |
+
report_file_name = report_type + ".csv"
|
87 |
+
else:
|
88 |
+
report_file_name = os.path.basename(max(management_summaries, key=os.path.getctime))
|
89 |
+
elif report_type in self.product_reports:
|
90 |
+
report_file_name = f"{product_name}_{report_type}.csv"
|
91 |
+
else:
|
92 |
+
raise ValueError(f"Invalid 'report_type': {report_type}")
|
93 |
+
|
94 |
+
return os.path.join(reports_dir, report_file_name)
|
95 |
+
|
96 |
+
|
97 |
+
def mock_progress_bar(max_seconds=15, max_steps=100):
|
98 |
+
progress_bar = st.progress(0)
|
99 |
+
seconds_per_step = max_seconds / max_steps
|
100 |
+
for i in range(100):
|
101 |
+
# Perform some work here
|
102 |
+
time.sleep(seconds_per_step)
|
103 |
+
progress_bar.progress(i + 1)
|
104 |
+
|
105 |
+
|
106 |
+
# Streamlit app
|
107 |
+
def run_app():
|
108 |
+
# Start screen to select the start date and end date
|
109 |
+
st.set_page_config(layout="wide")
|
110 |
+
|
111 |
+
# Set the title and page layout for the report dashboard
|
112 |
+
st.title("Report Dashboard")
|
113 |
+
st.sidebar.title(" :gear:️ Options")
|
114 |
+
|
115 |
+
st.sidebar.markdown("## Select Date Range")
|
116 |
+
start_date = st.sidebar.date_input("Start Date", datetime.date(2022, 1, 1))
|
117 |
+
end_date = st.sidebar.date_input("End Date", datetime.date.today())
|
118 |
+
# Button to generate reports
|
119 |
+
if st.sidebar.button("(Re)generate Reports"):
|
120 |
+
# Generate reports based on start and end date
|
121 |
+
with st.spinner("Generating reports..."):
|
122 |
+
start_date_str = start_date.strftime('%Y-%m-%d')
|
123 |
+
end_date_str = end_date.strftime('%Y-%m-%d')
|
124 |
+
generate_finance_reports(start_date_str, end_date_str)
|
125 |
+
st.success("Reports generated successfully!")
|
126 |
+
|
127 |
+
st.sidebar.markdown("## Choose the report type and product")
|
128 |
+
|
129 |
+
reports_helper = ReportsHelper()
|
130 |
+
|
131 |
+
# Get the report type and product selection from the sidebar
|
132 |
+
report_type_formatted = st.sidebar.selectbox("Report Type",
|
133 |
+
reports_helper.general_reports +
|
134 |
+
reports_helper.product_reports)
|
135 |
+
|
136 |
+
report_type = report_type_formatted.lower().replace(" ", "_")
|
137 |
+
product_name = None
|
138 |
+
if report_type in reports_helper.product_reports:
|
139 |
+
product_name = st.sidebar.selectbox("Product", reports_helper.available_products)
|
140 |
+
|
141 |
+
# Display the original table (csv) on the left side of the screen
|
142 |
+
col1, col2 = st.columns([1, 1])
|
143 |
+
|
144 |
+
with col1:
|
145 |
+
st.subheader("Table Explorer")
|
146 |
+
filepath = os.path.abspath(reports_helper.get_report_filepath(report_type, product_name))
|
147 |
+
if not os.path.exists(filepath):
|
148 |
+
st.write(f"Report not found: {filepath}")
|
149 |
+
else:
|
150 |
+
df = pd.read_csv(reports_helper.get_report_filepath(report_type, product_name), decimal=".")
|
151 |
+
st.write(df)
|
152 |
+
|
153 |
+
with col2:
|
154 |
+
if not os.path.exists(filepath):
|
155 |
+
st.write(f"Report not found: {filepath}")
|
156 |
+
else:
|
157 |
+
# Calculate the sum of all number columns
|
158 |
+
number_columns = df.select_dtypes(include=["int", "float"]).columns
|
159 |
+
number_columns = [col for col in number_columns if
|
160 |
+
not col.lower().endswith("_id") and not col.lower().endswith("_type")]
|
161 |
+
sum_values = df[number_columns].sum()
|
162 |
+
|
163 |
+
if report_type == "management_summary":
|
164 |
+
# Display the number columns as a line chart over the dates
|
165 |
+
st.subheader("Line Chart")
|
166 |
+
line_columns = st.multiselect("Select columns for Line Chart", number_columns, default=number_columns)
|
167 |
+
|
168 |
+
if line_columns:
|
169 |
+
line_fig = go.Figure()
|
170 |
+
for column in line_columns:
|
171 |
+
line_fig.add_trace(go.Line(x=df["Date"], y=df[column], name=column))
|
172 |
+
|
173 |
+
line_fig.update_layout(hovermode="x unified") # Set the hover mode
|
174 |
+
st.plotly_chart(line_fig)
|
175 |
+
|
176 |
+
# Display the sum of number columns as a bar chart
|
177 |
+
st.subheader("Totals")
|
178 |
+
fig = go.Figure()
|
179 |
+
fig.add_trace(go.Bar(x=sum_values.index, y=sum_values.values, name="", showlegend=False))
|
180 |
+
fig.update_layout(xaxis_title="Column", yaxis_title="Sum")
|
181 |
+
st.plotly_chart(fig)
|
182 |
+
|
183 |
+
|
184 |
+
if __name__ == "__main__":
|
185 |
+
run_app()
|
ffi_reports/generate_reports.py
ADDED
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Code to generate reports from the sqlite3 database.
|
3 |
+
"""
|
4 |
+
|
5 |
+
from abc import ABC, abstractmethod
|
6 |
+
from typing import Optional
|
7 |
+
import os
|
8 |
+
import datetime
|
9 |
+
import sqlite3 as sl
|
10 |
+
|
11 |
+
import sqlparse
|
12 |
+
import pandas as pd
|
13 |
+
from loguru import logger as lg
|
14 |
+
|
15 |
+
|
16 |
+
class Report(ABC):
|
17 |
+
|
18 |
+
def __init__(self, db_path: str, report_name: str, product_name: Optional[str] = None):
|
19 |
+
self.df = None
|
20 |
+
self.db_path = db_path
|
21 |
+
self.db_conn = None
|
22 |
+
self.db_cursor = None
|
23 |
+
|
24 |
+
self.product_name = product_name
|
25 |
+
self.transactions_table_name = "transactions"
|
26 |
+
self.users_table_name = "users"
|
27 |
+
self.third_party_table_name = self.__get_third_party_table_name()
|
28 |
+
|
29 |
+
if self.product_name:
|
30 |
+
self.report_name = f"{product_name}_{report_name}"
|
31 |
+
else:
|
32 |
+
self.report_name = report_name
|
33 |
+
|
34 |
+
self.report_path = f"./data/reports/{self.report_name}.csv"
|
35 |
+
os.makedirs(os.path.dirname(self.report_path), exist_ok=True)
|
36 |
+
|
37 |
+
# Check if tables exists
|
38 |
+
tables = [self.transactions_table_name, self.users_table_name, self.third_party_table_name]
|
39 |
+
for table in tables:
|
40 |
+
if table is not None:
|
41 |
+
self.__check_table_existence(table)
|
42 |
+
|
43 |
+
def __get_third_party_table_name(self):
|
44 |
+
if self.product_name:
|
45 |
+
if not self.product_name.startswith("Prod"):
|
46 |
+
raise ValueError(f"Product name {self.product_name} does not start with 'Prod'.")
|
47 |
+
product_letter = self.product_name.replace("Prod", "").lower()
|
48 |
+
third_party_table_name = f"tp{product_letter}_recon"
|
49 |
+
else:
|
50 |
+
third_party_table_name = None
|
51 |
+
return third_party_table_name
|
52 |
+
|
53 |
+
def __check_table_existence(self, table_name):
|
54 |
+
self.connect()
|
55 |
+
self.db_cursor.execute(
|
56 |
+
f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
|
57 |
+
table_names = self.db_cursor.fetchall()
|
58 |
+
if len(table_names) == 0:
|
59 |
+
raise ValueError(
|
60 |
+
f"Table {table_name} does not exist")
|
61 |
+
self.disconnect()
|
62 |
+
return True
|
63 |
+
|
64 |
+
def __write_sql_string(self, sql_string):
|
65 |
+
# format before writing
|
66 |
+
sql_string = sqlparse.format(sql_string, reindent=True, keyword_case="upper")
|
67 |
+
|
68 |
+
output_path = f"./data/sql_scripts/{self.report_name}.sql"
|
69 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
70 |
+
with open(output_path, "w+") as f:
|
71 |
+
f.write(sql_string)
|
72 |
+
lg.debug(f"SQL string written to: {output_path}")
|
73 |
+
|
74 |
+
def connect(self):
|
75 |
+
"""
|
76 |
+
Connect to the sqlite3 database.
|
77 |
+
"""
|
78 |
+
self.db_conn = sl.connect(self.db_path)
|
79 |
+
self.db_cursor = self.db_conn.cursor()
|
80 |
+
|
81 |
+
def disconnect(self):
|
82 |
+
"""
|
83 |
+
Disconnect from the sqlite3 database.
|
84 |
+
"""
|
85 |
+
self.db_conn.close()
|
86 |
+
|
87 |
+
def generate(self):
|
88 |
+
if self.db_conn is None or self.db_cursor is None:
|
89 |
+
raise ValueError("Database connection not established. Call connect() first.")
|
90 |
+
|
91 |
+
sql_string = self.generate_sql_string()
|
92 |
+
self.__write_sql_string(sql_string)
|
93 |
+
|
94 |
+
# Execute and save as df
|
95 |
+
self.db_cursor.execute(sql_string)
|
96 |
+
columns = [description[0] for description in self.db_cursor.description]
|
97 |
+
result = self.db_cursor.fetchall()
|
98 |
+
self.df = pd.DataFrame(result, columns=columns)
|
99 |
+
|
100 |
+
def to_csv(self):
|
101 |
+
if self.df is None:
|
102 |
+
raise ValueError("Report has not been generated yet. Call generate() first.")
|
103 |
+
|
104 |
+
# Format number columns with 2 decimal precision
|
105 |
+
number_columns = [column for column in self.df.columns if self.df[column].dtype in [int, float]]
|
106 |
+
self.df[number_columns] = self.df[number_columns].round(2)
|
107 |
+
self.df.to_csv(self.report_path, index=False, header=self.df.columns)
|
108 |
+
lg.debug(f"Report generated at: {self.report_path}")
|
109 |
+
|
110 |
+
@abstractmethod
|
111 |
+
def generate_sql_string(self):
|
112 |
+
pass
|
113 |
+
|
114 |
+
def __call__(self):
|
115 |
+
self.connect()
|
116 |
+
self.generate()
|
117 |
+
self.to_csv()
|
118 |
+
self.disconnect()
|
119 |
+
|
120 |
+
|
121 |
+
class ProductSummaryReport(Report):
|
122 |
+
"""
|
123 |
+
Generates a summary report for successful product transactions with one line per day for the given date range
|
124 |
+
|
125 |
+
Args:
|
126 |
+
product_name (str): The name of the product.
|
127 |
+
db_path (str): The path to the database.
|
128 |
+
start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
129 |
+
end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
130 |
+
"""
|
131 |
+
|
132 |
+
def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
|
133 |
+
end_date: Optional[str] = None):
|
134 |
+
report_name = f"summary_report"
|
135 |
+
super().__init__(db_path, report_name, product_name=product_name)
|
136 |
+
|
137 |
+
self.start_date = start_date
|
138 |
+
self.end_date = end_date
|
139 |
+
|
140 |
+
def generate_sql_string(self) -> str:
|
141 |
+
"""
|
142 |
+
Generates the SQL string for the summary report.
|
143 |
+
|
144 |
+
Returns:
|
145 |
+
str: The SQL string for the summary report.
|
146 |
+
"""
|
147 |
+
sql_string = f"""
|
148 |
+
SELECT
|
149 |
+
DATE(t.completed_at) AS Date,
|
150 |
+
SUM(CASE WHEN u.type = 0 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type0_Amount,
|
151 |
+
SUM(CASE WHEN u.type = 1 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type1_Amount,
|
152 |
+
SUM(CASE WHEN t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Amount,
|
153 |
+
SUM(CASE WHEN t.product = '{self.product_name}' THEN tp.amount ELSE 0 END) AS Total_TP_Amount
|
154 |
+
FROM
|
155 |
+
{self.transactions_table_name} t
|
156 |
+
JOIN
|
157 |
+
{self.users_table_name} u ON t.user_id = u.id
|
158 |
+
JOIN
|
159 |
+
{self.third_party_table_name} tp ON t.id = tp.id
|
160 |
+
WHERE t.status = 'SUCCESS'
|
161 |
+
"""
|
162 |
+
|
163 |
+
if self.start_date and self.end_date:
|
164 |
+
sql_string += f"AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
|
165 |
+
|
166 |
+
sql_string += "GROUP BY Date"
|
167 |
+
|
168 |
+
return sql_string
|
169 |
+
|
170 |
+
|
171 |
+
class ExceptionReportMissingRecon(Report):
|
172 |
+
"""
|
173 |
+
Generates an exception report listing any transactions in the FFI’s DB that
|
174 |
+
are not present in the TPA recon reports for the given date range
|
175 |
+
|
176 |
+
Args:
|
177 |
+
product_name (str): The name of the product.
|
178 |
+
db_path (str): The path to the database.
|
179 |
+
start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
180 |
+
end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
181 |
+
"""
|
182 |
+
|
183 |
+
def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
|
184 |
+
end_date: Optional[str] = None):
|
185 |
+
report_name = f"exception_report_missing_recon"
|
186 |
+
super().__init__(db_path, report_name, product_name=product_name)
|
187 |
+
|
188 |
+
self.start_date = start_date
|
189 |
+
self.end_date = end_date
|
190 |
+
|
191 |
+
def generate_sql_string(self) -> str:
|
192 |
+
"""
|
193 |
+
Generates the SQL string for the exception report.
|
194 |
+
|
195 |
+
Returns:
|
196 |
+
str: The SQL string for the exception report.
|
197 |
+
"""
|
198 |
+
sql_string = f"""
|
199 |
+
SELECT
|
200 |
+
t.completed_at AS Completed_At,
|
201 |
+
t.id AS Transaction_ID,
|
202 |
+
t.amount AS Amount,
|
203 |
+
t.user_id AS User_ID,
|
204 |
+
u.type AS User_Type,
|
205 |
+
t.status AS Status
|
206 |
+
FROM
|
207 |
+
{self.transactions_table_name} t
|
208 |
+
LEFT JOIN
|
209 |
+
{self.users_table_name} u ON t.user_id = u.id
|
210 |
+
LEFT JOIN
|
211 |
+
{self.third_party_table_name} tp ON t.id = tp.id
|
212 |
+
WHERE
|
213 |
+
t.product = '{self.product_name}' AND tp.id IS NULL
|
214 |
+
"""
|
215 |
+
|
216 |
+
if self.start_date and self.end_date:
|
217 |
+
sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
|
218 |
+
|
219 |
+
return sql_string
|
220 |
+
|
221 |
+
|
222 |
+
class ExceptionReportMissingTransactions(Report):
|
223 |
+
"""
|
224 |
+
Generates an exception report listing any transactions in the TPA recon reports
|
225 |
+
that are not present in FFI's DB for the given date range.
|
226 |
+
|
227 |
+
Args:
|
228 |
+
product_name (str): The name of the product.
|
229 |
+
db_path (str): The path to the database.
|
230 |
+
start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
231 |
+
end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
232 |
+
"""
|
233 |
+
|
234 |
+
def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
|
235 |
+
end_date: Optional[str] = None):
|
236 |
+
report_name = f"exception_report_missing_transactions"
|
237 |
+
super().__init__(db_path, report_name, product_name=product_name)
|
238 |
+
|
239 |
+
self.start_date = start_date
|
240 |
+
self.end_date = end_date
|
241 |
+
|
242 |
+
def generate_sql_string(self) -> str:
|
243 |
+
"""
|
244 |
+
Generates the SQL string for the exception report.
|
245 |
+
|
246 |
+
Returns:
|
247 |
+
str: The SQL string for the exception report.
|
248 |
+
"""
|
249 |
+
sql_string = f"""
|
250 |
+
SELECT
|
251 |
+
tp.timestamp AS Timestamp,
|
252 |
+
tp.id AS Transaction_ID,
|
253 |
+
tp.amount AS Amount
|
254 |
+
FROM
|
255 |
+
{self.third_party_table_name} tp
|
256 |
+
LEFT JOIN
|
257 |
+
{self.transactions_table_name} t ON tp.id = t.id
|
258 |
+
"""
|
259 |
+
|
260 |
+
if self.start_date and self.end_date:
|
261 |
+
sql_string += f"WHERE t.id IS NULL AND tp.timestamp >= '{self.start_date}' AND tp.timestamp <= '{self.end_date}'\n"
|
262 |
+
else:
|
263 |
+
sql_string += f"WHERE t.id IS NULL\n"
|
264 |
+
|
265 |
+
return sql_string
|
266 |
+
|
267 |
+
|
268 |
+
class ExceptionReportAmountMismatch(Report):
|
269 |
+
"""
|
270 |
+
Generates an exception report listing any transactions in FFI's DB that are present in the TPA recon reports
|
271 |
+
with a different amount, within the given date range.
|
272 |
+
|
273 |
+
Args:
|
274 |
+
db_path (str): The path to the database.
|
275 |
+
product_name (str): The name of the product.
|
276 |
+
start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
277 |
+
end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
|
278 |
+
"""
|
279 |
+
|
280 |
+
def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
|
281 |
+
end_date: Optional[str] = None):
|
282 |
+
report_name = "exception_report_amount_mismatch"
|
283 |
+
super().__init__(db_path, report_name, product_name=product_name)
|
284 |
+
|
285 |
+
self.start_date = start_date
|
286 |
+
self.end_date = end_date
|
287 |
+
|
288 |
+
def generate_sql_string(self) -> str:
|
289 |
+
"""
|
290 |
+
Generates the SQL string for the exception report.
|
291 |
+
|
292 |
+
Returns:
|
293 |
+
str: The SQL string for the exception report.
|
294 |
+
"""
|
295 |
+
sql_string = f"""
|
296 |
+
SELECT
|
297 |
+
t.completed_at AS Completed_At,
|
298 |
+
t.id AS Transaction_ID,
|
299 |
+
t.amount AS FFI_Amount,
|
300 |
+
tp.amount AS TP_Amount,
|
301 |
+
t.user_id AS User_ID,
|
302 |
+
u.type AS User_Type,
|
303 |
+
t.status AS Status
|
304 |
+
FROM
|
305 |
+
{self.transactions_table_name} t
|
306 |
+
LEFT JOIN
|
307 |
+
{self.users_table_name} u ON t.user_id = u.id
|
308 |
+
LEFT JOIN
|
309 |
+
{self.third_party_table_name} tp ON t.id = tp.id
|
310 |
+
WHERE t.amount <> tp.amount
|
311 |
+
"""
|
312 |
+
|
313 |
+
if self.start_date and self.end_date:
|
314 |
+
sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
|
315 |
+
|
316 |
+
return sql_string
|
317 |
+
|
318 |
+
|
319 |
+
class ManagementSummaryReport(Report):
|
320 |
+
"""
|
321 |
+
Generates a management summary report for the most recent 30 days in the database,
|
322 |
+
providing the daily gross transaction value for ProdA, ProdB, and the total of the two.
|
323 |
+
|
324 |
+
Args:
|
325 |
+
db_path (str): The path to the database.
|
326 |
+
"""
|
327 |
+
|
328 |
+
def __init__(self, db_path: str, nr_days: int = 30):
|
329 |
+
self.today = datetime.date.today()
|
330 |
+
today_str = self.today.strftime("%Y-%m-%d")
|
331 |
+
report_name = f"management_summary_report_{today_str}"
|
332 |
+
super().__init__(db_path, report_name)
|
333 |
+
|
334 |
+
def generate_sql_string(self) -> str:
|
335 |
+
"""
|
336 |
+
Generates the SQL string for the management summary report.
|
337 |
+
|
338 |
+
Returns:
|
339 |
+
str: The SQL string for the management summary report.
|
340 |
+
"""
|
341 |
+
query = f"SELECT DISTINCT product FROM {self.transactions_table_name};"
|
342 |
+
self.db_cursor.execute(query)
|
343 |
+
result = self.db_cursor.fetchall()
|
344 |
+
unique_products = [row[0] for row in result]
|
345 |
+
|
346 |
+
# Get maximum completion date in the database
|
347 |
+
query = f"SELECT MAX(completed_at) FROM {self.transactions_table_name};"
|
348 |
+
self.db_cursor.execute(query)
|
349 |
+
max_completion_date_result = self.db_cursor.fetchone()[0]
|
350 |
+
max_completion_date_result_dt = datetime.datetime.strptime(max_completion_date_result, "%Y-%m-%d %H:%M:%S")
|
351 |
+
start_date = max_completion_date_result_dt - datetime.timedelta(days=30)
|
352 |
+
end_date = max_completion_date_result_dt
|
353 |
+
|
354 |
+
sum_columns_string = ",\n".join([
|
355 |
+
f"SUM(CASE WHEN t.product = '{product_name}' THEN t.amount ELSE 0 END) AS {product_name}_Gross_Transaction_Value"
|
356 |
+
for product_name in unique_products])
|
357 |
+
|
358 |
+
sql_string = f"""
|
359 |
+
SELECT
|
360 |
+
DATE(t.completed_at) AS Date,
|
361 |
+
{sum_columns_string},
|
362 |
+
SUM(t.amount) AS Total_Gross_Transaction_Value
|
363 |
+
FROM
|
364 |
+
{self.transactions_table_name} t
|
365 |
+
WHERE
|
366 |
+
t.completed_at >= '{start_date}' AND t.completed_at <= '{end_date}'
|
367 |
+
GROUP BY
|
368 |
+
Date;
|
369 |
+
"""
|
370 |
+
return sql_string
|
371 |
+
|
372 |
+
|
373 |
+
def generate_finance_reports(start_date: str, end_date: str):
|
374 |
+
"""
|
375 |
+
Generates finance reports for the given date range.
|
376 |
+
|
377 |
+
Args:
|
378 |
+
start_date (str): The start date for the date range (format: 'YYYY-MM-DD').
|
379 |
+
end_date (str): The end date for the date range (format: 'YYYY-MM-DD').
|
380 |
+
|
381 |
+
"""
|
382 |
+
lg.add(f"./logs/generate_reports_log_{{time}}.log")
|
383 |
+
lg.info("Started generating finance reports...")
|
384 |
+
|
385 |
+
# columns = ['id', 'created_at', 'product', 'amount', 'status', 'user_id', 'completed_at']
|
386 |
+
DB_PATH = "./data/generated_txns.db"
|
387 |
+
PRODUCTS_LIST = ["ProdA", "ProdB"]
|
388 |
+
lg.debug(f"DB_PATH: {DB_PATH}")
|
389 |
+
lg.debug(f"PRODUCTS_LIST: {PRODUCTS_LIST}")
|
390 |
+
|
391 |
+
# Generate summary reports
|
392 |
+
lg.info("Generating summary reports...")
|
393 |
+
for product in PRODUCTS_LIST:
|
394 |
+
ProductSummaryReport(product, DB_PATH, start_date, end_date)()
|
395 |
+
|
396 |
+
# Generate exception reports
|
397 |
+
lg.info("Generating exception reports...")
|
398 |
+
for product in PRODUCTS_LIST:
|
399 |
+
ExceptionReportMissingRecon(product, DB_PATH, start_date, end_date)()
|
400 |
+
ExceptionReportMissingTransactions(product, DB_PATH, start_date, end_date)()
|
401 |
+
ExceptionReportAmountMismatch(product, DB_PATH, start_date, end_date)()
|
402 |
+
|
403 |
+
# Generate management summary report
|
404 |
+
lg.info("Generating management summary report...")
|
405 |
+
ManagementSummaryReport(DB_PATH)()
|
406 |
+
|
407 |
+
|
408 |
+
if __name__ == '__main__':
|
409 |
+
default_start_date = "2021-02-01"
|
410 |
+
default_end_date = "2021-05-31"
|
411 |
+
generate_finance_reports(default_start_date, default_end_date)
|
main.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This is the main file for the FFI Reports Dashboard.
|
3 |
+
"""
|
4 |
+
|
5 |
+
from ffi_reports import dashboard
|
6 |
+
|
7 |
+
if __name__ == '__main__':
|
8 |
+
dashboard.run_app()
|
pytest.ini
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[pytest]
|
2 |
+
testpaths = tests
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
loguru==0.7.0
|
2 |
+
pandas==2.0.1
|
3 |
+
sqlparse==0.4.4
|
4 |
+
matplotlib==3.7.1
|
5 |
+
streamlit==1.22.0
|
6 |
+
plotly==5.14.1
|
7 |
+
pytest==7.3.1
|
tests/__init__.py
ADDED
File without changes
|
tests/test_generate_reports.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This module contains tests for the generate_reports module.
|
3 |
+
Only one TestClass has been implemented for illustration purposes.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import pytest
|
7 |
+
import pandas as pd
|
8 |
+
from datetime import datetime
|
9 |
+
from ffi_reports.generate_reports import ProductSummaryReport
|
10 |
+
|
11 |
+
|
12 |
+
class TestProductSummaryReport:
|
13 |
+
DEFAULT_DB_PATH = "./data/generated_txns.db"
|
14 |
+
|
15 |
+
@pytest.fixture(scope="class")
|
16 |
+
def summary_report(self):
|
17 |
+
start_date = "2023-03-01"
|
18 |
+
end_date = "2023-05-31"
|
19 |
+
report = ProductSummaryReport("ProdA", self.DEFAULT_DB_PATH, start_date=start_date, end_date=end_date)
|
20 |
+
report.connect()
|
21 |
+
report.generate()
|
22 |
+
return report
|
23 |
+
|
24 |
+
def test_minimum_date_above_start_date(self, summary_report):
|
25 |
+
df = summary_report.df
|
26 |
+
min_date = df["Date"].min()
|
27 |
+
assert datetime.strptime(min_date, "%Y-%m-%d").date() >= datetime.strptime(summary_report.start_date,
|
28 |
+
"%Y-%m-%d").date()
|
29 |
+
|
30 |
+
def test_maximum_date_below_end_date(self, summary_report):
|
31 |
+
df = summary_report.df
|
32 |
+
max_date = df["Date"].max()
|
33 |
+
assert datetime.strptime(max_date, "%Y-%m-%d").date() <= datetime.strptime(summary_report.end_date,
|
34 |
+
"%Y-%m-%d").date()
|
35 |
+
|
36 |
+
def test_summary_report_generation(self, summary_report):
|
37 |
+
df = summary_report.df
|
38 |
+
assert isinstance(df, pd.DataFrame)
|
39 |
+
assert len(df) > 0
|
40 |
+
assert all(column in df.columns for column in
|
41 |
+
["Date", "Total_Type0_Amount", "Total_Type1_Amount", "Total_Amount", "Total_TP_Amount"])
|