Andi5986 commited on
Commit
aa8c619
·
1 Parent(s): 16429b0

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +96 -0
  2. data_processing.py +67 -0
  3. llm_agent.py +18 -0
  4. utils.py +44 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from data_processing import process_dataframe, process_journal, remove_na_accounts
4
+ from utils import get_table_download_link, to_excel
5
+ from io import BytesIO
6
+ from llm_agent import set_openai_key, init_agent, get_agent_response
7
+
8
+ st.title('Accounting Fast Close')
9
+
10
+ # Ask user for OpenAI API key
11
+ openai_api_key = st.sidebar.text_input('Enter your OpenAI API Key', type='password')
12
+ if openai_api_key:
13
+ set_openai_key(openai_api_key)
14
+
15
+ # Creating a button to toggle between uploaded documents and findings
16
+ view_option = st.selectbox('Choose View', ['Uploaded Documents', 'Findings'])
17
+
18
+ @st.cache_data
19
+ def load_excel_data(uploaded_file):
20
+ df = pd.read_excel(uploaded_file)
21
+ return df
22
+
23
+ uploaded_file1 = st.sidebar.file_uploader('Upload your trial balance Excel file', type=['xlsx'])
24
+ uploaded_file2 = st.sidebar.file_uploader('Upload your journal entry Excel file', type=['xlsx'])
25
+
26
+ if uploaded_file1 is not None:
27
+ df1 = load_excel_data(uploaded_file1)
28
+ # Process the DataFrame
29
+ df1 = process_dataframe(df1)
30
+
31
+ # AI Agent Section
32
+ if openai_api_key:
33
+ agent = init_agent(openai_api_key)
34
+
35
+ if uploaded_file2 is not None:
36
+ df2 = load_excel_data(uploaded_file2)
37
+ df2 = process_journal(df2)
38
+
39
+ if uploaded_file1 is not None and uploaded_file2 is not None:
40
+ # Merge df1 (trial balance) with df2 (journal entries)
41
+ df1 = pd.merge(df1, df2, on='Account', how='outer')
42
+
43
+ # Remove rows with 'Account' as NA
44
+ df1 = remove_na_accounts(df1)
45
+
46
+ # Define the columns we want to fill NaN values with 0
47
+ fillna_columns = ['Opening Balance Debit', 'Opening Balance Credit',
48
+ 'Current Transactions Debit', 'Current Transactions Credit',
49
+ 'Closing Balance Debit', 'Closing Balance Credit',
50
+ 'Debit Amount', 'Credit Amount' ]
51
+
52
+ # Replace NaN values with 0 in the defined columns
53
+ df1[fillna_columns] = df1[fillna_columns].fillna(0)
54
+
55
+ # Compute the differences
56
+ df1['Diff Dr.'] = df1['Current Transactions Debit'] - df1['Debit Amount']
57
+ df1['Diff Cr.'] = df1['Current Transactions Credit'] - df1['Credit Amount']
58
+
59
+ excel_data = to_excel(df1) # Move this line to here
60
+
61
+ # Uploaded Documents Section
62
+ if view_option == 'Uploaded Documents':
63
+ if uploaded_file1 is not None:
64
+ st.write(df1)
65
+ # st.markdown(get_table_download_link(excel_data, 'processed_data.xlsx'), unsafe_allow_html=True)
66
+
67
+ if uploaded_file2 is not None:
68
+ # Save the dataframes to an Excel file
69
+ excel_data_combined = BytesIO()
70
+ with pd.ExcelWriter(excel_data_combined, engine='xlsxwriter') as writer:
71
+ df1.to_excel(writer, sheet_name='Trial Balance', index=False)
72
+ df2.to_excel(writer, sheet_name='Journal Entry', index=False)
73
+
74
+ st.markdown(get_table_download_link(excel_data_combined.getvalue(), filename='combined.xlsx'), unsafe_allow_html=True)
75
+
76
+ elif view_option == 'Findings':
77
+ # Logic for findings should be implemented here
78
+ st.write(df1)
79
+
80
+ # AI Agent Section
81
+ if openai_api_key and uploaded_file1 is not None:
82
+ # Create a chat box for user questions
83
+ user_input = st.text_input('Ask a question:')
84
+ if user_input:
85
+ response = get_agent_response(agent, df1, user_input) # pass df1 as an argument
86
+ st.write(response)
87
+
88
+ if uploaded_file2 is not None:
89
+ # Save the dataframes to an Excel file
90
+ excel_data_combined = BytesIO()
91
+ with pd.ExcelWriter(excel_data_combined, engine='xlsxwriter') as writer:
92
+ df1.to_excel(writer, sheet_name='Trial Balance', index=False)
93
+ df2.to_excel(writer, sheet_name='Journal Entry', index=False)
94
+
95
+ st.markdown(get_table_download_link(excel_data_combined.getvalue(), filename='combined.xlsx'), unsafe_allow_html=True)
96
+
data_processing.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import List
3
+
4
+ def remove_nulls(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
5
+ for column in columns:
6
+ df = df[df[column].notnull() & df[column].astype(str).str[0].str.isdigit()]
7
+ return df
8
+
9
+ def remove_na_accounts(df: pd.DataFrame) -> pd.DataFrame:
10
+ df = df.dropna(subset=['Account'])
11
+ return df
12
+
13
+ def remove_empty_columns(df: pd.DataFrame) -> pd.DataFrame:
14
+ df = df.dropna(how='all', axis=1)
15
+ return df
16
+
17
+ def handle_unknown_columns(df: pd.DataFrame) -> pd.DataFrame:
18
+ df = df.apply(lambda x: x[x.astype(str).str[0].str.isdigit()] if x.dtype in ['object', 'int64'] else x)
19
+ return df
20
+
21
+ def rename_columns(df: pd.DataFrame) -> pd.DataFrame:
22
+ if len(df.columns) == 10:
23
+ df.columns = ['Account', 'Description', 'Opening Balance Debit', 'Opening Balance Credit',
24
+ 'Current Transactions Debit', 'Current Transactions Credit',
25
+ 'Total Transactions Debit', 'Total Transactions Credit',
26
+ 'Closing Balance Debit', 'Closing Balance Credit']
27
+ elif len(df.columns) == 8:
28
+ df.columns = ['Account', 'Description', 'Opening Balance Debit', 'Opening Balance Credit',
29
+ 'Current Transactions Debit', 'Current Transactions Credit',
30
+ 'Closing Balance Debit', 'Closing Balance Credit']
31
+ return df
32
+
33
+ def convert_to_float(df: pd.DataFrame, skip_columns: List[str]) -> pd.DataFrame:
34
+ df = df.apply(lambda x: x.astype(str).str.replace(',', '').astype(float) if x.name not in skip_columns else x)
35
+ return df
36
+
37
+ def process_dataframe(df: pd.DataFrame, *args) -> pd.DataFrame:
38
+ df = remove_nulls(df, args)
39
+ df = remove_empty_columns(df)
40
+ df = handle_unknown_columns(df)
41
+ df = rename_columns(df)
42
+ df = convert_to_float(df, ['Account', 'Description'])
43
+ return df
44
+
45
+
46
+ def rename_columns_je(df):
47
+ column_mapping = {
48
+ 'Cont debitor': 'Account Debit',
49
+ 'Cont creditor': 'Account Credit',
50
+ 'Suma': 'Amount'
51
+ }
52
+
53
+ df.rename(columns=column_mapping, inplace=True)
54
+ return df
55
+
56
+ def process_journal(df: pd.DataFrame) -> pd.DataFrame:
57
+ df = rename_columns_je(df)
58
+
59
+ transactions_dr = df.groupby('Account Debit').agg({'Amount': 'sum'}).reset_index().rename(columns={'Amount': 'Debit Amount', 'Account Debit': 'Account'})
60
+ transactions_cr = df.groupby('Account Credit').agg({'Amount': 'sum'}).reset_index().rename(columns={'Amount': 'Credit Amount', 'Account Credit': 'Account'})
61
+
62
+ df_out = pd.merge(transactions_dr, transactions_cr, on='Account', how='outer')
63
+
64
+ df_out.fillna(0, inplace=True)
65
+
66
+ return df_out
67
+
llm_agent.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from pandasai import PandasAI
4
+ from pandasai.llm.openai import OpenAI
5
+
6
+ def set_openai_key(api_key):
7
+ os.environ["OPENAI_API_KEY"] = api_key
8
+ return api_key
9
+
10
+ def init_agent(api_key):
11
+ api_key = set_openai_key(api_key)
12
+ llm = OpenAI(api_token=api_key)
13
+ pandas_ai = PandasAI(llm, conversational=False)
14
+ return pandas_ai
15
+
16
+ def get_agent_response(agent, df, user_input):
17
+ response = agent.run(df, user_input)
18
+ return response
utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import pandas as pd
3
+ from io import BytesIO
4
+ import os
5
+
6
+ def get_table_download_link(excel_data: bytes, filename: str = 'data.xlsx') -> str:
7
+ """Generates a link allowing the data in a given pandas dataframe to be downloaded
8
+ in: dataframe
9
+ out: href string
10
+ """
11
+ b64 = base64.b64encode(excel_data).decode() # some strings <-> bytes conversions necessary here
12
+ href = f'<a href="data:application/octet-stream;base64,{b64}" download={filename}>Download Excel File</a>'
13
+ return href
14
+
15
+ def convert_and_save_as_csv(uploaded_file) -> str:
16
+ # Check if the uploaded file is not None
17
+ if uploaded_file is not None:
18
+ try:
19
+ # Read the file with pandas
20
+ df = pd.read_excel(uploaded_file)
21
+ # Save the file as a CSV file
22
+ csv_file_path = os.path.splitext(uploaded_file.name)[0] + '.csv'
23
+ df.to_csv(csv_file_path, index=False)
24
+ return csv_file_path
25
+ except Exception as e:
26
+ print("Error: ", e)
27
+ return None
28
+ else:
29
+ print("No file uploaded.")
30
+ return None
31
+
32
+ def to_excel(df):
33
+ output = BytesIO()
34
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
35
+ df.to_excel(writer, sheet_name='Sheet1', index=False)
36
+ return output.getvalue()
37
+
38
+ def load_data(file_path: str) -> pd.DataFrame:
39
+ try:
40
+ df = pd.read_csv(file_path)
41
+ return df
42
+ except FileNotFoundError as e:
43
+ print(f"File not found: {e}")
44
+ return None