spanmandewit commited on
Commit
82debdf
·
1 Parent(s): ab3e833

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105
+ __pypackages__/
106
+
107
+ # Celery stuff
108
+ celerybeat-schedule
109
+ celerybeat.pid
110
+
111
+ # SageMath parsed files
112
+ *.sage.py
113
+
114
+ # Environments
115
+ .env
116
+ .venv
117
+ env/
118
+ venv/
119
+ ENV/
120
+ env.bak/
121
+ venv.bak/
122
+
123
+ # Spyder project settings
124
+ .spyderproject
125
+ .spyproject
126
+
127
+ # Rope project settings
128
+ .ropeproject
129
+
130
+ # mkdocs documentation
131
+ /site
132
+
133
+ # mypy
134
+ .mypy_cache/
135
+ .dmypy.json
136
+ dmypy.json
137
+
138
+ # Pyre type checker
139
+ .pyre/
140
+
141
+ # pytype static type analyzer
142
+ .pytype/
143
+
144
+ # Cython debug symbols
145
+ cython_debug/
146
+
147
+ # PyCharm
148
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
151
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152
+ .idea/
153
+
154
+
155
+ # project specific
156
+ data/
157
+ *.pdf
db_utils/__init__.py ADDED
File without changes
db_utils/generate_db.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code to generate the sqlite3 database.
3
+
4
+ Adjusted a few things from the original code to make it work with this case:
5
+ - Changed the method to use parameters instead of directly parsing arguments
6
+ """
7
+ import os
8
+ import argparse
9
+ import datetime
10
+ import pandas as pd
11
+ import random
12
+ import sqlite3 as sl
13
+ import sys
14
+
15
+
16
+ class TxnGenerator():
17
+ """
18
+ Generate a given number of users and transactions. Transaction properties are randomally determined.
19
+ At the same time as the internal details are generated, a third party record is generated to represent
20
+ the third part reconciliation records.
21
+ """
22
+
23
+ def __init__(self, db_conn):
24
+ self.db_conn = db_conn
25
+
26
+ def generate(self, num_users, num_txns):
27
+ # Generate num_users users.
28
+ user_ids = []
29
+ user_types = []
30
+ for i in range(num_users):
31
+ user_ids.append(i)
32
+ user_types.append(random.choices([0, 1], weights=[3, 1])[0])
33
+ df_users = pd.DataFrame({"id": user_ids, "type": user_types}).set_index("id")
34
+ df_users.to_sql("users", self.db_conn)
35
+
36
+ # Generate num_txns transactions, starting from the beginning of 2022.
37
+ txn_id = 0
38
+ timestamp = datetime.datetime(2022, 1, 1, 0, 0, 0)
39
+ txn_ids = []
40
+ txn_created_at = []
41
+ txn_products = []
42
+ txn_amounts = []
43
+ txn_statuses = []
44
+ txn_user_ids = []
45
+ txn_completed_at = []
46
+ tp_ids = []
47
+ tp_amounts = []
48
+ tp_timestamps = []
49
+ tp_products = []
50
+ while txn_id < num_txns:
51
+ # Every ten seconds, 50/50 create a transaction or do nothing.
52
+ action = random.choices(["skip", "txn"], weights=[1, 1])[0]
53
+ if action == "txn":
54
+ txn_ids.append(txn_id)
55
+ txn_created_at.append(timestamp)
56
+ txn_products.append(random.choices(["ProdA", "ProdB"], weights=[3, 5])[0])
57
+ txn_amounts.append(max(0.01, round(random.normalvariate(20, 10), 2)))
58
+ # Most txns succeed, some fail, a few get stuck in PENDING.
59
+ txn_statuses.append(random.choices(["SUCCESS", "FAILURE", "PENDING"], weights=[1000, 20, 2])[0])
60
+ txn_user_ids.append(random.choice(range(num_users)))
61
+ # Transactions normally settle within a few seconds, but can take longer. Cap at 3 days.
62
+ txn_completed_at.append(
63
+ timestamp + datetime.timedelta(seconds=min(random.lognormvariate(3, 10), 72 * 60 * 60)))
64
+ # Rarely the third parties just don't record the transaction.
65
+ tp_action = random.choices(["skip", "txn"], weights=[1, 2000])[0]
66
+ if tp_action == "txn" and txn_statuses[-1] != "FAILURE":
67
+ # Note failed transactions are not recorded. Some transactions are reported in recon, but
68
+ # FFI think are stuck pending.
69
+ tp_ids.append(txn_id)
70
+ # Rarely the third parties are out by a penny/cent.
71
+ tp_amounts.append(txn_amounts[-1] + random.choices([-0.01, 0, 0.01], weights=[1, 2000, 1])[0])
72
+ # Third party settlement timestamps are ususally around when FFI thinks the txn completed. But with some variance.
73
+ tp_timestamps.append(
74
+ txn_completed_at[-1] + datetime.timedelta(seconds=random.normalvariate(0, 180)))
75
+ tp_products.append(txn_products[-1])
76
+ txn_id += 1
77
+ timestamp += datetime.timedelta(seconds=10)
78
+
79
+ # Write the FFI transaction records to the DB.
80
+ df_txns = pd.DataFrame(
81
+ {"id": txn_ids, "created_at": txn_created_at, "product": txn_products, "amount": txn_amounts,
82
+ "status": txn_statuses, "user_id": txn_user_ids, "completed_at": txn_completed_at}).set_index("id")
83
+ df_txns.to_sql("transactions", self.db_conn)
84
+
85
+ # Split the third party recon records by product, then write to their respective DB tables.
86
+ df_tp_txns = pd.DataFrame(
87
+ {"id": tp_ids, "amount": tp_amounts, "timestamp": tp_timestamps, "product": tp_products}).set_index("id")
88
+ df_tpa_txns = df_tp_txns[df_tp_txns["product"] == "ProdA"].drop(columns=["product"])
89
+ df_tpb_txns = df_tp_txns[df_tp_txns["product"] == "ProdB"].drop(columns=["product"])
90
+ df_tpa_txns.to_sql("tpa_recon", self.db_conn)
91
+ df_tpb_txns.to_sql("tpb_recon", self.db_conn)
92
+
93
+ def report(self):
94
+ # Query the transactions table to give some overall stats.
95
+ report_df = pd.read_sql(
96
+ "SELECT MIN(created_at) AS start, MAX(created_at) AS end, COUNT(*) AS count FROM transactions",
97
+ self.db_conn)
98
+ print(
99
+ f"Generated {report_df.at[0, 'count']} transactions from {report_df.at[0, 'start']} to {report_df.at[0, 'end']}.")
100
+
101
+
102
+ def main(num_users, num_txns):
103
+ data_dir = "./data"
104
+ filename = "generated_txns.db"
105
+ os.makedirs(data_dir, exist_ok=True)
106
+
107
+ db_conn = sl.connect(os.path.join(data_dir, filename))
108
+ gen = TxnGenerator(db_conn)
109
+ gen.generate(num_users, num_txns)
110
+ gen.report()
111
+
112
+
113
+ if __name__ == "__main__":
114
+ random.seed(2022) # Aid repeatability
115
+
116
+ parser = argparse.ArgumentParser()
117
+ parser.add_argument("num_users", type=int, help="Number of users to generate")
118
+ parser.add_argument("num_txns", type=int, help="Number of transactions to generate")
119
+ args = parser.parse_args()
120
+
121
+ sys.exit(main(args.num_users, args.num_txns))
db_utils/summarize_db.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code to quickly explore the sqlite3 database.
3
+ """
4
+
5
+ import argparse
6
+ import os
7
+ import datetime
8
+ import pandas as pd
9
+ import random
10
+ import sqlite3 as sl
11
+ import sys
12
+
13
+
14
+ def print_sep():
15
+ print('------------------------------------------')
16
+
17
+
18
+ class DbSummarizer():
19
+ """
20
+ Summarizes the sqlite3 table by printing:
21
+ 1. Tables
22
+ 2. Number of rows in each table
23
+ """
24
+
25
+ def __init__(self, path: str = "./data/generated_txns.db"):
26
+ """
27
+ Initialize the DbSummarizer object.
28
+
29
+ Args:
30
+ path (str): Path to the sqlite3 database.
31
+ """
32
+ self.path = path
33
+ self.db_conn = None
34
+ self.db_cursor = None
35
+
36
+ def connect(self):
37
+ """
38
+ Connect to the sqlite3 database.
39
+ """
40
+ self.db_conn = sl.connect(self.path)
41
+ self.db_cursor = self.db_conn.cursor()
42
+
43
+ def disconnect(self):
44
+ """
45
+ Disconnect from the sqlite3 database.
46
+ """
47
+ self.db_conn.close()
48
+
49
+ def get_table_names(self):
50
+ """
51
+ Get the names of the tables in the sqlite3 database.
52
+ """
53
+ self.db_cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
54
+ table_names = self.db_cursor.fetchall()
55
+ return table_names
56
+
57
+ def summarize(self):
58
+ """
59
+ Summarize the sqlite3 database by printing:
60
+ 1. Number of tables
61
+ 2. Table names
62
+ 3. Number of rows in each table
63
+ 4. Column names and first 10 rows in each table
64
+ """
65
+ table_names = self.get_table_names()
66
+ print(f"This db has {len(table_names)} tables.")
67
+ print(f"Table names: {table_names}")
68
+ for table_name in table_names:
69
+ table_name_str = table_name[0]
70
+ print_sep()
71
+ print(f"Table name: {table_name_str}")
72
+ print_sep()
73
+
74
+ # Get nr of rows
75
+ self.db_cursor.execute(f"SELECT COUNT(*) FROM {table_name[0]};")
76
+ nr_rows = self.db_cursor.fetchone()[0]
77
+ print(f"Number of rows: {nr_rows}")
78
+ print_sep()
79
+
80
+ # Get column names and first 10 rows
81
+ self.db_cursor.execute(f"SELECT * FROM {table_name[0]} LIMIT 10;")
82
+ column_names = [description[0] for description in self.db_cursor.description]
83
+ print(f"Column names: {column_names}")
84
+ first_10_rows = self.db_cursor.fetchall()
85
+ df_first_10_rows = pd.DataFrame(first_10_rows, columns=column_names)
86
+ tables_dir = "./data/tables"
87
+ os.makedirs(tables_dir, exist_ok=True)
88
+ output_path = f"./data/tables/{table_name_str}_first_10_rows.csv"
89
+ df_first_10_rows.to_csv(output_path, index=False)
90
+ print(f"First 10 rows:")
91
+ print(df_first_10_rows)
92
+ print_sep()
93
+ print()
94
+
95
+ def __call__(self):
96
+ """
97
+ Call the class, connect with db, summarize the sqlite3 database, and disconnect
98
+ """
99
+ self.connect()
100
+ self.summarize()
101
+ self.disconnect()
102
+
103
+
104
+ if __name__ == "__main__":
105
+ print(os.getcwd())
106
+ parser = argparse.ArgumentParser()
107
+ parser.add_argument("--path", type=str, default="./data/generated_txns.db")
108
+ args = parser.parse_args()
109
+
110
+ db_summarizer = DbSummarizer(path=args.path)
111
+ db_summarizer()
ffi_reports/__init__.py ADDED
File without changes
ffi_reports/dashboard.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code to run streamlit dashboard
3
+ """
4
+
5
+ import glob
6
+ from typing import Optional
7
+ import os
8
+ import time
9
+ import datetime
10
+
11
+ import streamlit as st
12
+ import pandas as pd
13
+ import plotly.graph_objects as go
14
+
15
+ from ffi_reports.generate_reports import generate_finance_reports
16
+ from db_utils import generate_db
17
+
18
+
19
+ class ReportsHelper:
20
+ DEFAULT_DB_PATH = "./data/generated_txns.db"
21
+
22
+ AVAILABLE_PRODUCTS = [
23
+ "ProdA",
24
+ "ProdB"
25
+ ]
26
+
27
+ DEFAULT_GENERAL_REPORTS = [
28
+ "management_summary"
29
+ ]
30
+
31
+ DEFAULT_PRODUCT_REPORTS = [
32
+ "summary_report",
33
+ "exception_report_missing_transactions",
34
+ "exception_report_missing_recon",
35
+ "exception_report_amount_mismatch"
36
+ ]
37
+
38
+ def __init__(self, reports_dir: Optional[str] = "./data/reports"):
39
+ """
40
+ Initialize the ReportsHelper object which helps find the reports.
41
+
42
+ Args:
43
+ reports_dir (str, optional): The directory where the reports are stored.
44
+ """
45
+ self.reports_dir = reports_dir
46
+ self.product_reports = ReportsHelper.DEFAULT_PRODUCT_REPORTS
47
+ self.general_reports = ReportsHelper.DEFAULT_GENERAL_REPORTS
48
+ self.available_products = ReportsHelper.AVAILABLE_PRODUCTS
49
+ self.db_path = ReportsHelper.DEFAULT_DB_PATH
50
+
51
+ if not os.path.exists(self.db_path):
52
+ load_str = f"Generating database at: {self.db_path} " \
53
+ f"since running for the first time. This may take a while..."
54
+ print(load_str)
55
+ with st.spinner(load_str):
56
+ generate_db.main(1000, 1000000)
57
+ else:
58
+ print(f"Database found at: {self.db_path}")
59
+
60
+ def get_report_filepath(self, report_type: str, product_name: Optional[str] = None,
61
+ reports_dir: Optional[str] = "./data/reports") -> str:
62
+ """
63
+ Get the filepath for a report.
64
+
65
+ Args:
66
+ report_type (str): The type of report to get the filepath for. One of:
67
+ - management_summary
68
+ - summary_report
69
+ - exception_report_missing_transactions
70
+ - exception_report_missing_recon
71
+ - exception_report_amount_mismatch
72
+ product_name (str): The name of the product to get the report for. Required for product reports.
73
+ reports_dir (str, optional): The directory where the reports are stored.
74
+
75
+ Returns:
76
+
77
+ """
78
+ report_type = report_type.lower()
79
+ if report_type in self.product_reports and product_name is None:
80
+ raise ValueError("'product_name' must be provided for product reports.")
81
+
82
+ if report_type == 'management_summary':
83
+
84
+ management_summaries = glob.glob(os.path.join(reports_dir, "management_summary_*.csv"))
85
+ if len(management_summaries) == 0:
86
+ report_file_name = report_type + ".csv"
87
+ else:
88
+ report_file_name = os.path.basename(max(management_summaries, key=os.path.getctime))
89
+ elif report_type in self.product_reports:
90
+ report_file_name = f"{product_name}_{report_type}.csv"
91
+ else:
92
+ raise ValueError(f"Invalid 'report_type': {report_type}")
93
+
94
+ return os.path.join(reports_dir, report_file_name)
95
+
96
+
97
+ def mock_progress_bar(max_seconds=15, max_steps=100):
98
+ progress_bar = st.progress(0)
99
+ seconds_per_step = max_seconds / max_steps
100
+ for i in range(100):
101
+ # Perform some work here
102
+ time.sleep(seconds_per_step)
103
+ progress_bar.progress(i + 1)
104
+
105
+
106
+ # Streamlit app
107
+ def run_app():
108
+ # Start screen to select the start date and end date
109
+ st.set_page_config(layout="wide")
110
+
111
+ # Set the title and page layout for the report dashboard
112
+ st.title("Report Dashboard")
113
+ st.sidebar.title(" :gear:️ Options")
114
+
115
+ st.sidebar.markdown("## Select Date Range")
116
+ start_date = st.sidebar.date_input("Start Date", datetime.date(2022, 1, 1))
117
+ end_date = st.sidebar.date_input("End Date", datetime.date.today())
118
+ # Button to generate reports
119
+ if st.sidebar.button("(Re)generate Reports"):
120
+ # Generate reports based on start and end date
121
+ with st.spinner("Generating reports..."):
122
+ start_date_str = start_date.strftime('%Y-%m-%d')
123
+ end_date_str = end_date.strftime('%Y-%m-%d')
124
+ generate_finance_reports(start_date_str, end_date_str)
125
+ st.success("Reports generated successfully!")
126
+
127
+ st.sidebar.markdown("## Choose the report type and product")
128
+
129
+ reports_helper = ReportsHelper()
130
+
131
+ # Get the report type and product selection from the sidebar
132
+ report_type_formatted = st.sidebar.selectbox("Report Type",
133
+ reports_helper.general_reports +
134
+ reports_helper.product_reports)
135
+
136
+ report_type = report_type_formatted.lower().replace(" ", "_")
137
+ product_name = None
138
+ if report_type in reports_helper.product_reports:
139
+ product_name = st.sidebar.selectbox("Product", reports_helper.available_products)
140
+
141
+ # Display the original table (csv) on the left side of the screen
142
+ col1, col2 = st.columns([1, 1])
143
+
144
+ with col1:
145
+ st.subheader("Table Explorer")
146
+ filepath = os.path.abspath(reports_helper.get_report_filepath(report_type, product_name))
147
+ if not os.path.exists(filepath):
148
+ st.write(f"Report not found: {filepath}")
149
+ else:
150
+ df = pd.read_csv(reports_helper.get_report_filepath(report_type, product_name), decimal=".")
151
+ st.write(df)
152
+
153
+ with col2:
154
+ if not os.path.exists(filepath):
155
+ st.write(f"Report not found: {filepath}")
156
+ else:
157
+ # Calculate the sum of all number columns
158
+ number_columns = df.select_dtypes(include=["int", "float"]).columns
159
+ number_columns = [col for col in number_columns if
160
+ not col.lower().endswith("_id") and not col.lower().endswith("_type")]
161
+ sum_values = df[number_columns].sum()
162
+
163
+ if report_type == "management_summary":
164
+ # Display the number columns as a line chart over the dates
165
+ st.subheader("Line Chart")
166
+ line_columns = st.multiselect("Select columns for Line Chart", number_columns, default=number_columns)
167
+
168
+ if line_columns:
169
+ line_fig = go.Figure()
170
+ for column in line_columns:
171
+ line_fig.add_trace(go.Line(x=df["Date"], y=df[column], name=column))
172
+
173
+ line_fig.update_layout(hovermode="x unified") # Set the hover mode
174
+ st.plotly_chart(line_fig)
175
+
176
+ # Display the sum of number columns as a bar chart
177
+ st.subheader("Totals")
178
+ fig = go.Figure()
179
+ fig.add_trace(go.Bar(x=sum_values.index, y=sum_values.values, name="", showlegend=False))
180
+ fig.update_layout(xaxis_title="Column", yaxis_title="Sum")
181
+ st.plotly_chart(fig)
182
+
183
+
184
+ if __name__ == "__main__":
185
+ run_app()
ffi_reports/generate_reports.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code to generate reports from the sqlite3 database.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional
7
+ import os
8
+ import datetime
9
+ import sqlite3 as sl
10
+
11
+ import sqlparse
12
+ import pandas as pd
13
+ from loguru import logger as lg
14
+
15
+
16
+ class Report(ABC):
17
+
18
+ def __init__(self, db_path: str, report_name: str, product_name: Optional[str] = None):
19
+ self.df = None
20
+ self.db_path = db_path
21
+ self.db_conn = None
22
+ self.db_cursor = None
23
+
24
+ self.product_name = product_name
25
+ self.transactions_table_name = "transactions"
26
+ self.users_table_name = "users"
27
+ self.third_party_table_name = self.__get_third_party_table_name()
28
+
29
+ if self.product_name:
30
+ self.report_name = f"{product_name}_{report_name}"
31
+ else:
32
+ self.report_name = report_name
33
+
34
+ self.report_path = f"./data/reports/{self.report_name}.csv"
35
+ os.makedirs(os.path.dirname(self.report_path), exist_ok=True)
36
+
37
+ # Check if tables exists
38
+ tables = [self.transactions_table_name, self.users_table_name, self.third_party_table_name]
39
+ for table in tables:
40
+ if table is not None:
41
+ self.__check_table_existence(table)
42
+
43
+ def __get_third_party_table_name(self):
44
+ if self.product_name:
45
+ if not self.product_name.startswith("Prod"):
46
+ raise ValueError(f"Product name {self.product_name} does not start with 'Prod'.")
47
+ product_letter = self.product_name.replace("Prod", "").lower()
48
+ third_party_table_name = f"tp{product_letter}_recon"
49
+ else:
50
+ third_party_table_name = None
51
+ return third_party_table_name
52
+
53
+ def __check_table_existence(self, table_name):
54
+ self.connect()
55
+ self.db_cursor.execute(
56
+ f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
57
+ table_names = self.db_cursor.fetchall()
58
+ if len(table_names) == 0:
59
+ raise ValueError(
60
+ f"Table {table_name} does not exist")
61
+ self.disconnect()
62
+ return True
63
+
64
+ def __write_sql_string(self, sql_string):
65
+ # format before writing
66
+ sql_string = sqlparse.format(sql_string, reindent=True, keyword_case="upper")
67
+
68
+ output_path = f"./data/sql_scripts/{self.report_name}.sql"
69
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
70
+ with open(output_path, "w+") as f:
71
+ f.write(sql_string)
72
+ lg.debug(f"SQL string written to: {output_path}")
73
+
74
+ def connect(self):
75
+ """
76
+ Connect to the sqlite3 database.
77
+ """
78
+ self.db_conn = sl.connect(self.db_path)
79
+ self.db_cursor = self.db_conn.cursor()
80
+
81
+ def disconnect(self):
82
+ """
83
+ Disconnect from the sqlite3 database.
84
+ """
85
+ self.db_conn.close()
86
+
87
+ def generate(self):
88
+ if self.db_conn is None or self.db_cursor is None:
89
+ raise ValueError("Database connection not established. Call connect() first.")
90
+
91
+ sql_string = self.generate_sql_string()
92
+ self.__write_sql_string(sql_string)
93
+
94
+ # Execute and save as df
95
+ self.db_cursor.execute(sql_string)
96
+ columns = [description[0] for description in self.db_cursor.description]
97
+ result = self.db_cursor.fetchall()
98
+ self.df = pd.DataFrame(result, columns=columns)
99
+
100
+ def to_csv(self):
101
+ if self.df is None:
102
+ raise ValueError("Report has not been generated yet. Call generate() first.")
103
+
104
+ # Format number columns with 2 decimal precision
105
+ number_columns = [column for column in self.df.columns if self.df[column].dtype in [int, float]]
106
+ self.df[number_columns] = self.df[number_columns].round(2)
107
+ self.df.to_csv(self.report_path, index=False, header=self.df.columns)
108
+ lg.debug(f"Report generated at: {self.report_path}")
109
+
110
+ @abstractmethod
111
+ def generate_sql_string(self):
112
+ pass
113
+
114
+ def __call__(self):
115
+ self.connect()
116
+ self.generate()
117
+ self.to_csv()
118
+ self.disconnect()
119
+
120
+
121
+ class ProductSummaryReport(Report):
122
+ """
123
+ Generates a summary report for successful product transactions with one line per day for the given date range
124
+
125
+ Args:
126
+ product_name (str): The name of the product.
127
+ db_path (str): The path to the database.
128
+ start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
129
+ end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
130
+ """
131
+
132
+ def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
133
+ end_date: Optional[str] = None):
134
+ report_name = f"summary_report"
135
+ super().__init__(db_path, report_name, product_name=product_name)
136
+
137
+ self.start_date = start_date
138
+ self.end_date = end_date
139
+
140
+ def generate_sql_string(self) -> str:
141
+ """
142
+ Generates the SQL string for the summary report.
143
+
144
+ Returns:
145
+ str: The SQL string for the summary report.
146
+ """
147
+ sql_string = f"""
148
+ SELECT
149
+ DATE(t.completed_at) AS Date,
150
+ SUM(CASE WHEN u.type = 0 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type0_Amount,
151
+ SUM(CASE WHEN u.type = 1 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type1_Amount,
152
+ SUM(CASE WHEN t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Amount,
153
+ SUM(CASE WHEN t.product = '{self.product_name}' THEN tp.amount ELSE 0 END) AS Total_TP_Amount
154
+ FROM
155
+ {self.transactions_table_name} t
156
+ JOIN
157
+ {self.users_table_name} u ON t.user_id = u.id
158
+ JOIN
159
+ {self.third_party_table_name} tp ON t.id = tp.id
160
+ WHERE t.status = 'SUCCESS'
161
+ """
162
+
163
+ if self.start_date and self.end_date:
164
+ sql_string += f"AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
165
+
166
+ sql_string += "GROUP BY Date"
167
+
168
+ return sql_string
169
+
170
+
171
+ class ExceptionReportMissingRecon(Report):
172
+ """
173
+ Generates an exception report listing any transactions in the FFI’s DB that
174
+ are not present in the TPA recon reports for the given date range
175
+
176
+ Args:
177
+ product_name (str): The name of the product.
178
+ db_path (str): The path to the database.
179
+ start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
180
+ end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
181
+ """
182
+
183
+ def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
184
+ end_date: Optional[str] = None):
185
+ report_name = f"exception_report_missing_recon"
186
+ super().__init__(db_path, report_name, product_name=product_name)
187
+
188
+ self.start_date = start_date
189
+ self.end_date = end_date
190
+
191
+ def generate_sql_string(self) -> str:
192
+ """
193
+ Generates the SQL string for the exception report.
194
+
195
+ Returns:
196
+ str: The SQL string for the exception report.
197
+ """
198
+ sql_string = f"""
199
+ SELECT
200
+ t.completed_at AS Completed_At,
201
+ t.id AS Transaction_ID,
202
+ t.amount AS Amount,
203
+ t.user_id AS User_ID,
204
+ u.type AS User_Type,
205
+ t.status AS Status
206
+ FROM
207
+ {self.transactions_table_name} t
208
+ LEFT JOIN
209
+ {self.users_table_name} u ON t.user_id = u.id
210
+ LEFT JOIN
211
+ {self.third_party_table_name} tp ON t.id = tp.id
212
+ WHERE
213
+ t.product = '{self.product_name}' AND tp.id IS NULL
214
+ """
215
+
216
+ if self.start_date and self.end_date:
217
+ sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
218
+
219
+ return sql_string
220
+
221
+
222
+ class ExceptionReportMissingTransactions(Report):
223
+ """
224
+ Generates an exception report listing any transactions in the TPA recon reports
225
+ that are not present in FFI's DB for the given date range.
226
+
227
+ Args:
228
+ product_name (str): The name of the product.
229
+ db_path (str): The path to the database.
230
+ start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
231
+ end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
232
+ """
233
+
234
+ def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
235
+ end_date: Optional[str] = None):
236
+ report_name = f"exception_report_missing_transactions"
237
+ super().__init__(db_path, report_name, product_name=product_name)
238
+
239
+ self.start_date = start_date
240
+ self.end_date = end_date
241
+
242
+ def generate_sql_string(self) -> str:
243
+ """
244
+ Generates the SQL string for the exception report.
245
+
246
+ Returns:
247
+ str: The SQL string for the exception report.
248
+ """
249
+ sql_string = f"""
250
+ SELECT
251
+ tp.timestamp AS Timestamp,
252
+ tp.id AS Transaction_ID,
253
+ tp.amount AS Amount
254
+ FROM
255
+ {self.third_party_table_name} tp
256
+ LEFT JOIN
257
+ {self.transactions_table_name} t ON tp.id = t.id
258
+ """
259
+
260
+ if self.start_date and self.end_date:
261
+ sql_string += f"WHERE t.id IS NULL AND tp.timestamp >= '{self.start_date}' AND tp.timestamp <= '{self.end_date}'\n"
262
+ else:
263
+ sql_string += f"WHERE t.id IS NULL\n"
264
+
265
+ return sql_string
266
+
267
+
268
+ class ExceptionReportAmountMismatch(Report):
269
+ """
270
+ Generates an exception report listing any transactions in FFI's DB that are present in the TPA recon reports
271
+ with a different amount, within the given date range.
272
+
273
+ Args:
274
+ db_path (str): The path to the database.
275
+ product_name (str): The name of the product.
276
+ start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
277
+ end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
278
+ """
279
+
280
+ def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
281
+ end_date: Optional[str] = None):
282
+ report_name = "exception_report_amount_mismatch"
283
+ super().__init__(db_path, report_name, product_name=product_name)
284
+
285
+ self.start_date = start_date
286
+ self.end_date = end_date
287
+
288
+ def generate_sql_string(self) -> str:
289
+ """
290
+ Generates the SQL string for the exception report.
291
+
292
+ Returns:
293
+ str: The SQL string for the exception report.
294
+ """
295
+ sql_string = f"""
296
+ SELECT
297
+ t.completed_at AS Completed_At,
298
+ t.id AS Transaction_ID,
299
+ t.amount AS FFI_Amount,
300
+ tp.amount AS TP_Amount,
301
+ t.user_id AS User_ID,
302
+ u.type AS User_Type,
303
+ t.status AS Status
304
+ FROM
305
+ {self.transactions_table_name} t
306
+ LEFT JOIN
307
+ {self.users_table_name} u ON t.user_id = u.id
308
+ LEFT JOIN
309
+ {self.third_party_table_name} tp ON t.id = tp.id
310
+ WHERE t.amount <> tp.amount
311
+ """
312
+
313
+ if self.start_date and self.end_date:
314
+ sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
315
+
316
+ return sql_string
317
+
318
+
319
+ class ManagementSummaryReport(Report):
320
+ """
321
+ Generates a management summary report for the most recent 30 days in the database,
322
+ providing the daily gross transaction value for ProdA, ProdB, and the total of the two.
323
+
324
+ Args:
325
+ db_path (str): The path to the database.
326
+ """
327
+
328
+ def __init__(self, db_path: str, nr_days: int = 30):
329
+ self.today = datetime.date.today()
330
+ today_str = self.today.strftime("%Y-%m-%d")
331
+ report_name = f"management_summary_report_{today_str}"
332
+ super().__init__(db_path, report_name)
333
+
334
+ def generate_sql_string(self) -> str:
335
+ """
336
+ Generates the SQL string for the management summary report.
337
+
338
+ Returns:
339
+ str: The SQL string for the management summary report.
340
+ """
341
+ query = f"SELECT DISTINCT product FROM {self.transactions_table_name};"
342
+ self.db_cursor.execute(query)
343
+ result = self.db_cursor.fetchall()
344
+ unique_products = [row[0] for row in result]
345
+
346
+ # Get maximum completion date in the database
347
+ query = f"SELECT MAX(completed_at) FROM {self.transactions_table_name};"
348
+ self.db_cursor.execute(query)
349
+ max_completion_date_result = self.db_cursor.fetchone()[0]
350
+ max_completion_date_result_dt = datetime.datetime.strptime(max_completion_date_result, "%Y-%m-%d %H:%M:%S")
351
+ start_date = max_completion_date_result_dt - datetime.timedelta(days=30)
352
+ end_date = max_completion_date_result_dt
353
+
354
+ sum_columns_string = ",\n".join([
355
+ f"SUM(CASE WHEN t.product = '{product_name}' THEN t.amount ELSE 0 END) AS {product_name}_Gross_Transaction_Value"
356
+ for product_name in unique_products])
357
+
358
+ sql_string = f"""
359
+ SELECT
360
+ DATE(t.completed_at) AS Date,
361
+ {sum_columns_string},
362
+ SUM(t.amount) AS Total_Gross_Transaction_Value
363
+ FROM
364
+ {self.transactions_table_name} t
365
+ WHERE
366
+ t.completed_at >= '{start_date}' AND t.completed_at <= '{end_date}'
367
+ GROUP BY
368
+ Date;
369
+ """
370
+ return sql_string
371
+
372
+
373
+ def generate_finance_reports(start_date: str, end_date: str):
374
+ """
375
+ Generates finance reports for the given date range.
376
+
377
+ Args:
378
+ start_date (str): The start date for the date range (format: 'YYYY-MM-DD').
379
+ end_date (str): The end date for the date range (format: 'YYYY-MM-DD').
380
+
381
+ """
382
+ lg.add(f"./logs/generate_reports_log_{{time}}.log")
383
+ lg.info("Started generating finance reports...")
384
+
385
+ # columns = ['id', 'created_at', 'product', 'amount', 'status', 'user_id', 'completed_at']
386
+ DB_PATH = "./data/generated_txns.db"
387
+ PRODUCTS_LIST = ["ProdA", "ProdB"]
388
+ lg.debug(f"DB_PATH: {DB_PATH}")
389
+ lg.debug(f"PRODUCTS_LIST: {PRODUCTS_LIST}")
390
+
391
+ # Generate summary reports
392
+ lg.info("Generating summary reports...")
393
+ for product in PRODUCTS_LIST:
394
+ ProductSummaryReport(product, DB_PATH, start_date, end_date)()
395
+
396
+ # Generate exception reports
397
+ lg.info("Generating exception reports...")
398
+ for product in PRODUCTS_LIST:
399
+ ExceptionReportMissingRecon(product, DB_PATH, start_date, end_date)()
400
+ ExceptionReportMissingTransactions(product, DB_PATH, start_date, end_date)()
401
+ ExceptionReportAmountMismatch(product, DB_PATH, start_date, end_date)()
402
+
403
+ # Generate management summary report
404
+ lg.info("Generating management summary report...")
405
+ ManagementSummaryReport(DB_PATH)()
406
+
407
+
408
+ if __name__ == '__main__':
409
+ default_start_date = "2021-02-01"
410
+ default_end_date = "2021-05-31"
411
+ generate_finance_reports(default_start_date, default_end_date)
main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This is the main file for the FFI Reports Dashboard.
3
+ """
4
+
5
+ from ffi_reports import dashboard
6
+
7
+ if __name__ == '__main__':
8
+ dashboard.run_app()
pytest.ini ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [pytest]
2
+ testpaths = tests
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ loguru==0.7.0
2
+ pandas==2.0.1
3
+ sqlparse==0.4.4
4
+ matplotlib==3.7.1
5
+ streamlit==1.22.0
6
+ plotly==5.14.1
7
+ pytest==7.3.1
tests/__init__.py ADDED
File without changes
tests/test_generate_reports.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module contains tests for the generate_reports module.
3
+ Only one TestClass has been implemented for illustration purposes.
4
+ """
5
+
6
+ import pytest
7
+ import pandas as pd
8
+ from datetime import datetime
9
+ from ffi_reports.generate_reports import ProductSummaryReport
10
+
11
+
12
+ class TestProductSummaryReport:
13
+ DEFAULT_DB_PATH = "./data/generated_txns.db"
14
+
15
+ @pytest.fixture(scope="class")
16
+ def summary_report(self):
17
+ start_date = "2023-03-01"
18
+ end_date = "2023-05-31"
19
+ report = ProductSummaryReport("ProdA", self.DEFAULT_DB_PATH, start_date=start_date, end_date=end_date)
20
+ report.connect()
21
+ report.generate()
22
+ return report
23
+
24
+ def test_minimum_date_above_start_date(self, summary_report):
25
+ df = summary_report.df
26
+ min_date = df["Date"].min()
27
+ assert datetime.strptime(min_date, "%Y-%m-%d").date() >= datetime.strptime(summary_report.start_date,
28
+ "%Y-%m-%d").date()
29
+
30
+ def test_maximum_date_below_end_date(self, summary_report):
31
+ df = summary_report.df
32
+ max_date = df["Date"].max()
33
+ assert datetime.strptime(max_date, "%Y-%m-%d").date() <= datetime.strptime(summary_report.end_date,
34
+ "%Y-%m-%d").date()
35
+
36
+ def test_summary_report_generation(self, summary_report):
37
+ df = summary_report.df
38
+ assert isinstance(df, pd.DataFrame)
39
+ assert len(df) > 0
40
+ assert all(column in df.columns for column in
41
+ ["Date", "Total_Type0_Amount", "Total_Type1_Amount", "Total_Amount", "Total_TP_Amount"])