Spaces:

tampershads-01
/

case_streamlit_demo

Sleeping

App Files Files Community

spanmandewit commited on May 18, 2023

Commit

82debdf

1 Parent(s): ab3e833

initial commit

Browse files

Files changed (12) hide show

.gitignore +157 -0
db_utils/__init__.py +0 -0
db_utils/generate_db.py +121 -0
db_utils/summarize_db.py +111 -0
ffi_reports/__init__.py +0 -0
ffi_reports/dashboard.py +185 -0
ffi_reports/generate_reports.py +411 -0
main.py +8 -0
pytest.ini +2 -0
requirements.txt +7 -0
tests/__init__.py +0 -0
tests/test_generate_reports.py +41 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,157 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+# project specific
+data/
+*.pdf

db_utils/__init__.py ADDED Viewed

File without changes

db_utils/generate_db.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""
+Code to generate the sqlite3 database.
+Adjusted a few things from the original code to make it work with this case:
+- Changed the method to use parameters instead of directly parsing arguments
+"""
+import os
+import argparse
+import datetime
+import pandas as pd
+import random
+import sqlite3 as sl
+import sys
+class TxnGenerator():
+    """
+    Generate a given number of users and transactions. Transaction properties are randomally determined.
+    At the same time as the internal details are generated, a third party record is generated to represent
+    the third part reconciliation records.
+    """
+    def __init__(self, db_conn):
+        self.db_conn = db_conn
+    def generate(self, num_users, num_txns):
+        # Generate num_users users.
+        user_ids = []
+        user_types = []
+        for i in range(num_users):
+            user_ids.append(i)
+            user_types.append(random.choices([0, 1], weights=[3, 1])[0])
+        df_users = pd.DataFrame({"id": user_ids, "type": user_types}).set_index("id")
+        df_users.to_sql("users", self.db_conn)
+        # Generate num_txns transactions, starting from the beginning of 2022.
+        txn_id = 0
+        timestamp = datetime.datetime(2022, 1, 1, 0, 0, 0)
+        txn_ids = []
+        txn_created_at = []
+        txn_products = []
+        txn_amounts = []
+        txn_statuses = []
+        txn_user_ids = []
+        txn_completed_at = []
+        tp_ids = []
+        tp_amounts = []
+        tp_timestamps = []
+        tp_products = []
+        while txn_id < num_txns:
+            # Every ten seconds, 50/50 create a transaction or do nothing.
+            action = random.choices(["skip", "txn"], weights=[1, 1])[0]
+            if action == "txn":
+                txn_ids.append(txn_id)
+                txn_created_at.append(timestamp)
+                txn_products.append(random.choices(["ProdA", "ProdB"], weights=[3, 5])[0])
+                txn_amounts.append(max(0.01, round(random.normalvariate(20, 10), 2)))
+                # Most txns succeed, some fail, a few get stuck in PENDING.
+                txn_statuses.append(random.choices(["SUCCESS", "FAILURE", "PENDING"], weights=[1000, 20, 2])[0])
+                txn_user_ids.append(random.choice(range(num_users)))
+                # Transactions normally settle within a few seconds, but can take longer. Cap at 3 days.
+                txn_completed_at.append(
+                    timestamp + datetime.timedelta(seconds=min(random.lognormvariate(3, 10), 72 * 60 * 60)))
+                # Rarely the third parties just don't record the transaction.
+                tp_action = random.choices(["skip", "txn"], weights=[1, 2000])[0]
+                if tp_action == "txn" and txn_statuses[-1] != "FAILURE":
+                    # Note failed transactions are not recorded. Some transactions are reported in recon, but
+                    # FFI think are stuck pending.
+                    tp_ids.append(txn_id)
+                    # Rarely the third parties are out by a penny/cent.
+                    tp_amounts.append(txn_amounts[-1] + random.choices([-0.01, 0, 0.01], weights=[1, 2000, 1])[0])
+                    # Third party settlement timestamps are ususally around when FFI thinks the txn completed. But with some variance.
+                    tp_timestamps.append(
+                        txn_completed_at[-1] + datetime.timedelta(seconds=random.normalvariate(0, 180)))
+                    tp_products.append(txn_products[-1])
+                txn_id += 1
+            timestamp += datetime.timedelta(seconds=10)
+        # Write the FFI transaction records to the DB.
+        df_txns = pd.DataFrame(
+            {"id": txn_ids, "created_at": txn_created_at, "product": txn_products, "amount": txn_amounts,
+             "status": txn_statuses, "user_id": txn_user_ids, "completed_at": txn_completed_at}).set_index("id")
+        df_txns.to_sql("transactions", self.db_conn)
+        # Split the third party recon records by product, then write to their respective DB tables.
+        df_tp_txns = pd.DataFrame(
+            {"id": tp_ids, "amount": tp_amounts, "timestamp": tp_timestamps, "product": tp_products}).set_index("id")
+        df_tpa_txns = df_tp_txns[df_tp_txns["product"] == "ProdA"].drop(columns=["product"])
+        df_tpb_txns = df_tp_txns[df_tp_txns["product"] == "ProdB"].drop(columns=["product"])
+        df_tpa_txns.to_sql("tpa_recon", self.db_conn)
+        df_tpb_txns.to_sql("tpb_recon", self.db_conn)
+    def report(self):
+        # Query the transactions table to give some overall stats.
+        report_df = pd.read_sql(
+            "SELECT MIN(created_at) AS start, MAX(created_at) AS end, COUNT(*) AS count FROM transactions",
+            self.db_conn)
+        print(
+            f"Generated {report_df.at[0, 'count']} transactions from {report_df.at[0, 'start']} to {report_df.at[0, 'end']}.")
+def main(num_users, num_txns):
+    data_dir = "./data"
+    filename = "generated_txns.db"
+    os.makedirs(data_dir, exist_ok=True)
+    db_conn = sl.connect(os.path.join(data_dir, filename))
+    gen = TxnGenerator(db_conn)
+    gen.generate(num_users, num_txns)
+    gen.report()
+if __name__ == "__main__":
+    random.seed(2022)  # Aid repeatability
+    parser = argparse.ArgumentParser()
+    parser.add_argument("num_users", type=int, help="Number of users to generate")
+    parser.add_argument("num_txns", type=int, help="Number of transactions to generate")
+    args = parser.parse_args()
+    sys.exit(main(args.num_users, args.num_txns))

db_utils/summarize_db.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""
+Code to quickly explore the sqlite3 database.
+"""
+import argparse
+import os
+import datetime
+import pandas as pd
+import random
+import sqlite3 as sl
+import sys
+def print_sep():
+    print('------------------------------------------')
+class DbSummarizer():
+    """
+    Summarizes the sqlite3 table by printing:
+    1. Tables
+    2. Number of rows in each table
+    """
+    def __init__(self, path: str = "./data/generated_txns.db"):
+        """
+        Initialize the DbSummarizer object.
+        Args:
+            path (str): Path to the sqlite3 database.
+        """
+        self.path = path
+        self.db_conn = None
+        self.db_cursor = None
+    def connect(self):
+        """
+        Connect to the sqlite3 database.
+        """
+        self.db_conn = sl.connect(self.path)
+        self.db_cursor = self.db_conn.cursor()
+    def disconnect(self):
+        """
+        Disconnect from the sqlite3 database.
+        """
+        self.db_conn.close()
+    def get_table_names(self):
+        """
+        Get the names of the tables in the sqlite3 database.
+        """
+        self.db_cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        table_names = self.db_cursor.fetchall()
+        return table_names
+    def summarize(self):
+        """
+        Summarize the sqlite3 database by printing:
+            1. Number of tables
+            2. Table names
+            3. Number of rows in each table
+            4. Column names and first 10 rows in each table
+        """
+        table_names = self.get_table_names()
+        print(f"This db has {len(table_names)} tables.")
+        print(f"Table names: {table_names}")
+        for table_name in table_names:
+            table_name_str = table_name[0]
+            print_sep()
+            print(f"Table name: {table_name_str}")
+            print_sep()
+            # Get nr of rows
+            self.db_cursor.execute(f"SELECT COUNT(*) FROM {table_name[0]};")
+            nr_rows = self.db_cursor.fetchone()[0]
+            print(f"Number of rows: {nr_rows}")
+            print_sep()
+            # Get column names and first 10 rows
+            self.db_cursor.execute(f"SELECT * FROM {table_name[0]} LIMIT 10;")
+            column_names = [description[0] for description in self.db_cursor.description]
+            print(f"Column names: {column_names}")
+            first_10_rows = self.db_cursor.fetchall()
+            df_first_10_rows = pd.DataFrame(first_10_rows, columns=column_names)
+            tables_dir = "./data/tables"
+            os.makedirs(tables_dir, exist_ok=True)
+            output_path = f"./data/tables/{table_name_str}_first_10_rows.csv"
+            df_first_10_rows.to_csv(output_path, index=False)
+            print(f"First 10 rows:")
+            print(df_first_10_rows)
+            print_sep()
+            print()
+    def __call__(self):
+        """
+        Call the class, connect with db, summarize the sqlite3 database, and disconnect
+        """
+        self.connect()
+        self.summarize()
+        self.disconnect()
+if __name__ == "__main__":
+    print(os.getcwd())
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, default="./data/generated_txns.db")
+    args = parser.parse_args()
+    db_summarizer = DbSummarizer(path=args.path)
+    db_summarizer()

ffi_reports/__init__.py ADDED Viewed

File without changes

ffi_reports/dashboard.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""
+Code to run streamlit dashboard
+"""
+import glob
+from typing import Optional
+import os
+import time
+import datetime
+import streamlit as st
+import pandas as pd
+import plotly.graph_objects as go
+from ffi_reports.generate_reports import generate_finance_reports
+from db_utils import generate_db
+class ReportsHelper:
+    DEFAULT_DB_PATH = "./data/generated_txns.db"
+    AVAILABLE_PRODUCTS = [
+        "ProdA",
+        "ProdB"
+    ]
+    DEFAULT_GENERAL_REPORTS = [
+        "management_summary"
+    ]
+    DEFAULT_PRODUCT_REPORTS = [
+        "summary_report",
+        "exception_report_missing_transactions",
+        "exception_report_missing_recon",
+        "exception_report_amount_mismatch"
+    ]
+    def __init__(self, reports_dir: Optional[str] = "./data/reports"):
+        """
+        Initialize the ReportsHelper object which helps find the reports.
+        Args:
+            reports_dir (str, optional): The directory where the reports are stored.
+        """
+        self.reports_dir = reports_dir
+        self.product_reports = ReportsHelper.DEFAULT_PRODUCT_REPORTS
+        self.general_reports = ReportsHelper.DEFAULT_GENERAL_REPORTS
+        self.available_products = ReportsHelper.AVAILABLE_PRODUCTS
+        self.db_path = ReportsHelper.DEFAULT_DB_PATH
+        if not os.path.exists(self.db_path):
+            load_str = f"Generating database at: {self.db_path} " \
+                       f"since running for the first time. This may take a while..."
+            print(load_str)
+            with st.spinner(load_str):
+                generate_db.main(1000, 1000000)
+        else:
+            print(f"Database found at: {self.db_path}")
+    def get_report_filepath(self, report_type: str, product_name: Optional[str] = None,
+                            reports_dir: Optional[str] = "./data/reports") -> str:
+        """
+        Get the filepath for a report.
+        Args:
+            report_type (str): The type of report to get the filepath for. One of:
+                                - management_summary
+                                - summary_report
+                                - exception_report_missing_transactions
+                                - exception_report_missing_recon
+                                - exception_report_amount_mismatch
+            product_name (str): The name of the product to get the report for. Required for product reports.
+            reports_dir (str, optional): The directory where the reports are stored.
+        Returns:
+        """
+        report_type = report_type.lower()
+        if report_type in self.product_reports and product_name is None:
+            raise ValueError("'product_name' must be provided for product reports.")
+        if report_type == 'management_summary':
+            management_summaries = glob.glob(os.path.join(reports_dir, "management_summary_*.csv"))
+            if len(management_summaries) == 0:
+                report_file_name = report_type + ".csv"
+            else:
+                report_file_name = os.path.basename(max(management_summaries, key=os.path.getctime))
+        elif report_type in self.product_reports:
+            report_file_name = f"{product_name}_{report_type}.csv"
+        else:
+            raise ValueError(f"Invalid 'report_type': {report_type}")
+        return os.path.join(reports_dir, report_file_name)
+def mock_progress_bar(max_seconds=15, max_steps=100):
+    progress_bar = st.progress(0)
+    seconds_per_step = max_seconds / max_steps
+    for i in range(100):
+        # Perform some work here
+        time.sleep(seconds_per_step)
+        progress_bar.progress(i + 1)
+# Streamlit app
+def run_app():
+    # Start screen to select the start date and end date
+    st.set_page_config(layout="wide")
+    # Set the title and page layout for the report dashboard
+    st.title("Report Dashboard")
+    st.sidebar.title(" :gear:️ Options")
+    st.sidebar.markdown("## Select Date Range")
+    start_date = st.sidebar.date_input("Start Date", datetime.date(2022, 1, 1))
+    end_date = st.sidebar.date_input("End Date", datetime.date.today())
+    # Button to generate reports
+    if st.sidebar.button("(Re)generate Reports"):
+        # Generate reports based on start and end date
+        with st.spinner("Generating reports..."):
+            start_date_str = start_date.strftime('%Y-%m-%d')
+            end_date_str = end_date.strftime('%Y-%m-%d')
+            generate_finance_reports(start_date_str, end_date_str)
+        st.success("Reports generated successfully!")
+    st.sidebar.markdown("## Choose the report type and product")
+    reports_helper = ReportsHelper()
+    # Get the report type and product selection from the sidebar
+    report_type_formatted = st.sidebar.selectbox("Report Type",
+                                                 reports_helper.general_reports +
+                                                 reports_helper.product_reports)
+    report_type = report_type_formatted.lower().replace(" ", "_")
+    product_name = None
+    if report_type in reports_helper.product_reports:
+        product_name = st.sidebar.selectbox("Product", reports_helper.available_products)
+    # Display the original table (csv) on the left side of the screen
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("Table Explorer")
+        filepath = os.path.abspath(reports_helper.get_report_filepath(report_type, product_name))
+        if not os.path.exists(filepath):
+            st.write(f"Report not found: {filepath}")
+        else:
+            df = pd.read_csv(reports_helper.get_report_filepath(report_type, product_name), decimal=".")
+            st.write(df)
+    with col2:
+        if not os.path.exists(filepath):
+            st.write(f"Report not found: {filepath}")
+        else:
+            # Calculate the sum of all number columns
+            number_columns = df.select_dtypes(include=["int", "float"]).columns
+            number_columns = [col for col in number_columns if
+                              not col.lower().endswith("_id") and not col.lower().endswith("_type")]
+            sum_values = df[number_columns].sum()
+            if report_type == "management_summary":
+                # Display the number columns as a line chart over the dates
+                st.subheader("Line Chart")
+                line_columns = st.multiselect("Select columns for Line Chart", number_columns, default=number_columns)
+                if line_columns:
+                    line_fig = go.Figure()
+                    for column in line_columns:
+                        line_fig.add_trace(go.Line(x=df["Date"], y=df[column], name=column))
+                    line_fig.update_layout(hovermode="x unified")  # Set the hover mode
+                    st.plotly_chart(line_fig)
+            # Display the sum of number columns as a bar chart
+            st.subheader("Totals")
+            fig = go.Figure()
+            fig.add_trace(go.Bar(x=sum_values.index, y=sum_values.values, name="", showlegend=False))
+            fig.update_layout(xaxis_title="Column", yaxis_title="Sum")
+            st.plotly_chart(fig)
+if __name__ == "__main__":
+    run_app()

ffi_reports/generate_reports.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""
+Code to generate reports from the sqlite3 database.
+"""
+from abc import ABC, abstractmethod
+from typing import Optional
+import os
+import datetime
+import sqlite3 as sl
+import sqlparse
+import pandas as pd
+from loguru import logger as lg
+class Report(ABC):
+    def __init__(self, db_path: str, report_name: str, product_name: Optional[str] = None):
+        self.df = None
+        self.db_path = db_path
+        self.db_conn = None
+        self.db_cursor = None
+        self.product_name = product_name
+        self.transactions_table_name = "transactions"
+        self.users_table_name = "users"
+        self.third_party_table_name = self.__get_third_party_table_name()
+        if self.product_name:
+            self.report_name = f"{product_name}_{report_name}"
+        else:
+            self.report_name = report_name
+        self.report_path = f"./data/reports/{self.report_name}.csv"
+        os.makedirs(os.path.dirname(self.report_path), exist_ok=True)
+        # Check if tables exists
+        tables = [self.transactions_table_name, self.users_table_name, self.third_party_table_name]
+        for table in tables:
+            if table is not None:
+                self.__check_table_existence(table)
+    def __get_third_party_table_name(self):
+        if self.product_name:
+            if not self.product_name.startswith("Prod"):
+                raise ValueError(f"Product name {self.product_name} does not start with 'Prod'.")
+            product_letter = self.product_name.replace("Prod", "").lower()
+            third_party_table_name = f"tp{product_letter}_recon"
+        else:
+            third_party_table_name = None
+        return third_party_table_name
+    def __check_table_existence(self, table_name):
+        self.connect()
+        self.db_cursor.execute(
+            f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';")
+        table_names = self.db_cursor.fetchall()
+        if len(table_names) == 0:
+            raise ValueError(
+                f"Table {table_name} does not exist")
+        self.disconnect()
+        return True
+    def __write_sql_string(self, sql_string):
+        # format before writing
+        sql_string = sqlparse.format(sql_string, reindent=True, keyword_case="upper")
+        output_path = f"./data/sql_scripts/{self.report_name}.sql"
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        with open(output_path, "w+") as f:
+            f.write(sql_string)
+        lg.debug(f"SQL string written to: {output_path}")
+    def connect(self):
+        """
+        Connect to the sqlite3 database.
+        """
+        self.db_conn = sl.connect(self.db_path)
+        self.db_cursor = self.db_conn.cursor()
+    def disconnect(self):
+        """
+        Disconnect from the sqlite3 database.
+        """
+        self.db_conn.close()
+    def generate(self):
+        if self.db_conn is None or self.db_cursor is None:
+            raise ValueError("Database connection not established. Call connect() first.")
+        sql_string = self.generate_sql_string()
+        self.__write_sql_string(sql_string)
+        # Execute and save as df
+        self.db_cursor.execute(sql_string)
+        columns = [description[0] for description in self.db_cursor.description]
+        result = self.db_cursor.fetchall()
+        self.df = pd.DataFrame(result, columns=columns)
+    def to_csv(self):
+        if self.df is None:
+            raise ValueError("Report has not been generated yet. Call generate() first.")
+        # Format number columns with 2 decimal precision
+        number_columns = [column for column in self.df.columns if self.df[column].dtype in [int, float]]
+        self.df[number_columns] = self.df[number_columns].round(2)
+        self.df.to_csv(self.report_path, index=False, header=self.df.columns)
+        lg.debug(f"Report generated at: {self.report_path}")
+    @abstractmethod
+    def generate_sql_string(self):
+        pass
+    def __call__(self):
+        self.connect()
+        self.generate()
+        self.to_csv()
+        self.disconnect()
+class ProductSummaryReport(Report):
+    """
+    Generates a summary report for successful product transactions with one line per day for the given date range
+    Args:
+        product_name (str): The name of the product.
+        db_path (str): The path to the database.
+        start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+        end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+    """
+    def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
+                 end_date: Optional[str] = None):
+        report_name = f"summary_report"
+        super().__init__(db_path, report_name, product_name=product_name)
+        self.start_date = start_date
+        self.end_date = end_date
+    def generate_sql_string(self) -> str:
+        """
+        Generates the SQL string for the summary report.
+        Returns:
+            str: The SQL string for the summary report.
+        """
+        sql_string = f"""
+            SELECT
+                DATE(t.completed_at) AS Date,
+                SUM(CASE WHEN u.type = 0 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type0_Amount,
+                SUM(CASE WHEN u.type = 1 AND t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Type1_Amount,
+                SUM(CASE WHEN t.product = '{self.product_name}' THEN t.amount ELSE 0 END) AS Total_Amount,
+                SUM(CASE WHEN t.product = '{self.product_name}' THEN tp.amount ELSE 0 END) AS Total_TP_Amount
+            FROM
+                {self.transactions_table_name} t
+            JOIN
+                {self.users_table_name} u ON t.user_id = u.id
+            JOIN
+                {self.third_party_table_name} tp ON t.id = tp.id
+            WHERE t.status = 'SUCCESS'
+            """
+        if self.start_date and self.end_date:
+            sql_string += f"AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
+        sql_string += "GROUP BY Date"
+        return sql_string
+class ExceptionReportMissingRecon(Report):
+    """
+    Generates an exception report listing any transactions in the FFI’s DB that
+    are not present in the TPA recon reports for the given date range
+    Args:
+        product_name (str): The name of the product.
+        db_path (str): The path to the database.
+        start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+        end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+    """
+    def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
+                 end_date: Optional[str] = None):
+        report_name = f"exception_report_missing_recon"
+        super().__init__(db_path, report_name, product_name=product_name)
+        self.start_date = start_date
+        self.end_date = end_date
+    def generate_sql_string(self) -> str:
+        """
+        Generates the SQL string for the exception report.
+        Returns:
+            str: The SQL string for the exception report.
+        """
+        sql_string = f"""
+            SELECT
+                t.completed_at AS Completed_At,
+                t.id AS Transaction_ID,
+                t.amount AS Amount,
+                t.user_id AS User_ID,
+                u.type AS User_Type,
+                t.status AS Status
+            FROM
+                {self.transactions_table_name} t
+            LEFT JOIN
+                {self.users_table_name} u ON t.user_id = u.id
+            LEFT JOIN
+                {self.third_party_table_name} tp ON t.id = tp.id
+            WHERE
+                t.product = '{self.product_name}' AND tp.id IS NULL
+            """
+        if self.start_date and self.end_date:
+            sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
+        return sql_string
+class ExceptionReportMissingTransactions(Report):
+    """
+    Generates an exception report listing any transactions in the TPA recon reports
+    that are not present in FFI's DB for the given date range.
+    Args:
+        product_name (str): The name of the product.
+        db_path (str): The path to the database.
+        start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+        end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+    """
+    def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
+                 end_date: Optional[str] = None):
+        report_name = f"exception_report_missing_transactions"
+        super().__init__(db_path, report_name, product_name=product_name)
+        self.start_date = start_date
+        self.end_date = end_date
+    def generate_sql_string(self) -> str:
+        """
+        Generates the SQL string for the exception report.
+        Returns:
+            str: The SQL string for the exception report.
+        """
+        sql_string = f"""
+            SELECT
+                tp.timestamp AS Timestamp,
+                tp.id AS Transaction_ID,
+                tp.amount AS Amount
+            FROM
+                {self.third_party_table_name} tp
+            LEFT JOIN
+                {self.transactions_table_name} t ON tp.id = t.id
+            """
+        if self.start_date and self.end_date:
+            sql_string += f"WHERE t.id IS NULL AND tp.timestamp >= '{self.start_date}' AND tp.timestamp <= '{self.end_date}'\n"
+        else:
+            sql_string += f"WHERE t.id IS NULL\n"
+        return sql_string
+class ExceptionReportAmountMismatch(Report):
+    """
+    Generates an exception report listing any transactions in FFI's DB that are present in the TPA recon reports
+    with a different amount, within the given date range.
+    Args:
+        db_path (str): The path to the database.
+        product_name (str): The name of the product.
+        start_date (str, optional): The start date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+        end_date (str, optional): The end date for the date range (format: 'YYYY-MM-DD'). Defaults to None.
+    """
+    def __init__(self, product_name: str, db_path: str, start_date: Optional[str] = None,
+                 end_date: Optional[str] = None):
+        report_name = "exception_report_amount_mismatch"
+        super().__init__(db_path, report_name, product_name=product_name)
+        self.start_date = start_date
+        self.end_date = end_date
+    def generate_sql_string(self) -> str:
+        """
+        Generates the SQL string for the exception report.
+        Returns:
+            str: The SQL string for the exception report.
+        """
+        sql_string = f"""
+            SELECT
+                t.completed_at AS Completed_At,
+                t.id AS Transaction_ID,
+                t.amount AS FFI_Amount,
+                tp.amount AS TP_Amount,
+                t.user_id AS User_ID,
+                u.type AS User_Type,
+                t.status AS Status
+            FROM
+                {self.transactions_table_name} t
+            LEFT JOIN
+                {self.users_table_name} u ON t.user_id = u.id
+            LEFT JOIN
+                {self.third_party_table_name} tp ON t.id = tp.id
+            WHERE t.amount <> tp.amount
+            """
+        if self.start_date and self.end_date:
+            sql_string += f" AND t.completed_at >= '{self.start_date}' AND t.completed_at <= '{self.end_date}'\n"
+        return sql_string
+class ManagementSummaryReport(Report):
+    """
+    Generates a management summary report for the most recent 30 days in the database,
+    providing the daily gross transaction value for ProdA, ProdB, and the total of the two.
+    Args:
+        db_path (str): The path to the database.
+    """
+    def __init__(self, db_path: str, nr_days: int = 30):
+        self.today = datetime.date.today()
+        today_str = self.today.strftime("%Y-%m-%d")
+        report_name = f"management_summary_report_{today_str}"
+        super().__init__(db_path, report_name)
+    def generate_sql_string(self) -> str:
+        """
+        Generates the SQL string for the management summary report.
+        Returns:
+            str: The SQL string for the management summary report.
+        """
+        query = f"SELECT DISTINCT product FROM {self.transactions_table_name};"
+        self.db_cursor.execute(query)
+        result = self.db_cursor.fetchall()
+        unique_products = [row[0] for row in result]
+        # Get maximum completion date in the database
+        query = f"SELECT MAX(completed_at) FROM {self.transactions_table_name};"
+        self.db_cursor.execute(query)
+        max_completion_date_result = self.db_cursor.fetchone()[0]
+        max_completion_date_result_dt = datetime.datetime.strptime(max_completion_date_result, "%Y-%m-%d %H:%M:%S")
+        start_date = max_completion_date_result_dt - datetime.timedelta(days=30)
+        end_date = max_completion_date_result_dt
+        sum_columns_string = ",\n".join([
+            f"SUM(CASE WHEN t.product = '{product_name}' THEN t.amount ELSE 0 END) AS {product_name}_Gross_Transaction_Value"
+            for product_name in unique_products])
+        sql_string = f"""
+            SELECT
+                DATE(t.completed_at) AS Date,
+                {sum_columns_string},
+                SUM(t.amount) AS Total_Gross_Transaction_Value
+            FROM
+                {self.transactions_table_name} t
+            WHERE
+                t.completed_at >= '{start_date}' AND t.completed_at <= '{end_date}'
+            GROUP BY
+                Date;
+        """
+        return sql_string
+def generate_finance_reports(start_date: str, end_date: str):
+    """
+    Generates finance reports for the given date range.
+    Args:
+        start_date (str): The start date for the date range (format: 'YYYY-MM-DD').
+        end_date (str): The end date for the date range (format: 'YYYY-MM-DD').
+    """
+    lg.add(f"./logs/generate_reports_log_{{time}}.log")
+    lg.info("Started generating finance reports...")
+    # columns = ['id', 'created_at', 'product', 'amount', 'status', 'user_id', 'completed_at']
+    DB_PATH = "./data/generated_txns.db"
+    PRODUCTS_LIST = ["ProdA", "ProdB"]
+    lg.debug(f"DB_PATH: {DB_PATH}")
+    lg.debug(f"PRODUCTS_LIST: {PRODUCTS_LIST}")
+    # Generate summary reports
+    lg.info("Generating summary reports...")
+    for product in PRODUCTS_LIST:
+        ProductSummaryReport(product, DB_PATH, start_date, end_date)()
+    # Generate exception reports
+    lg.info("Generating exception reports...")
+    for product in PRODUCTS_LIST:
+        ExceptionReportMissingRecon(product, DB_PATH, start_date, end_date)()
+        ExceptionReportMissingTransactions(product, DB_PATH, start_date, end_date)()
+        ExceptionReportAmountMismatch(product, DB_PATH, start_date, end_date)()
+    # Generate management summary report
+    lg.info("Generating management summary report...")
+    ManagementSummaryReport(DB_PATH)()
+if __name__ == '__main__':
+    default_start_date = "2021-02-01"
+    default_end_date = "2021-05-31"
+    generate_finance_reports(default_start_date, default_end_date)

main.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+This is the main file for the FFI Reports Dashboard.
+"""
+from ffi_reports import dashboard
+if __name__ == '__main__':
+    dashboard.run_app()

pytest.ini ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [pytest]
2	+ testpaths = tests

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+loguru==0.7.0
+pandas==2.0.1
+sqlparse==0.4.4
+matplotlib==3.7.1
+streamlit==1.22.0
+plotly==5.14.1
+pytest==7.3.1

tests/__init__.py ADDED Viewed

File without changes

tests/test_generate_reports.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""
+This module contains tests for the generate_reports module.
+Only one TestClass has been implemented for illustration purposes.
+"""
+import pytest
+import pandas as pd
+from datetime import datetime
+from ffi_reports.generate_reports import ProductSummaryReport
+class TestProductSummaryReport:
+    DEFAULT_DB_PATH = "./data/generated_txns.db"
+    @pytest.fixture(scope="class")
+    def summary_report(self):
+        start_date = "2023-03-01"
+        end_date = "2023-05-31"
+        report = ProductSummaryReport("ProdA", self.DEFAULT_DB_PATH, start_date=start_date, end_date=end_date)
+        report.connect()
+        report.generate()
+        return report
+    def test_minimum_date_above_start_date(self, summary_report):
+        df = summary_report.df
+        min_date = df["Date"].min()
+        assert datetime.strptime(min_date, "%Y-%m-%d").date() >= datetime.strptime(summary_report.start_date,
+                                                                                   "%Y-%m-%d").date()
+    def test_maximum_date_below_end_date(self, summary_report):
+        df = summary_report.df
+        max_date = df["Date"].max()
+        assert datetime.strptime(max_date, "%Y-%m-%d").date() <= datetime.strptime(summary_report.end_date,
+                                                                                   "%Y-%m-%d").date()
+    def test_summary_report_generation(self, summary_report):
+        df = summary_report.df
+        assert isinstance(df, pd.DataFrame)
+        assert len(df) > 0
+        assert all(column in df.columns for column in
+                   ["Date", "Total_Type0_Amount", "Total_Type1_Amount", "Total_Amount", "Total_TP_Amount"])