Spaces:
Build error
Build error
Upload 4 files (#1)
Browse files- Upload 4 files (aa8c619e0bbc53b384a10e20d4edeae9269329d1)
- app.py +96 -0
- data_processing.py +67 -0
- llm_agent.py +18 -0
- utils.py +44 -0
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
from data_processing import process_dataframe, process_journal, remove_na_accounts
|
4 |
+
from utils import get_table_download_link, to_excel
|
5 |
+
from io import BytesIO
|
6 |
+
from llm_agent import set_openai_key, init_agent, get_agent_response
|
7 |
+
|
8 |
+
st.title('Accounting Fast Close')
|
9 |
+
|
10 |
+
# Ask user for OpenAI API key
|
11 |
+
openai_api_key = st.sidebar.text_input('Enter your OpenAI API Key', type='password')
|
12 |
+
if openai_api_key:
|
13 |
+
set_openai_key(openai_api_key)
|
14 |
+
|
15 |
+
# Creating a button to toggle between uploaded documents and findings
|
16 |
+
view_option = st.selectbox('Choose View', ['Uploaded Documents', 'Findings'])
|
17 |
+
|
18 |
+
@st.cache_data
|
19 |
+
def load_excel_data(uploaded_file):
|
20 |
+
df = pd.read_excel(uploaded_file)
|
21 |
+
return df
|
22 |
+
|
23 |
+
uploaded_file1 = st.sidebar.file_uploader('Upload your trial balance Excel file', type=['xlsx'])
|
24 |
+
uploaded_file2 = st.sidebar.file_uploader('Upload your journal entry Excel file', type=['xlsx'])
|
25 |
+
|
26 |
+
if uploaded_file1 is not None:
|
27 |
+
df1 = load_excel_data(uploaded_file1)
|
28 |
+
# Process the DataFrame
|
29 |
+
df1 = process_dataframe(df1)
|
30 |
+
|
31 |
+
# AI Agent Section
|
32 |
+
if openai_api_key:
|
33 |
+
agent = init_agent(openai_api_key)
|
34 |
+
|
35 |
+
if uploaded_file2 is not None:
|
36 |
+
df2 = load_excel_data(uploaded_file2)
|
37 |
+
df2 = process_journal(df2)
|
38 |
+
|
39 |
+
if uploaded_file1 is not None and uploaded_file2 is not None:
|
40 |
+
# Merge df1 (trial balance) with df2 (journal entries)
|
41 |
+
df1 = pd.merge(df1, df2, on='Account', how='outer')
|
42 |
+
|
43 |
+
# Remove rows with 'Account' as NA
|
44 |
+
df1 = remove_na_accounts(df1)
|
45 |
+
|
46 |
+
# Define the columns we want to fill NaN values with 0
|
47 |
+
fillna_columns = ['Opening Balance Debit', 'Opening Balance Credit',
|
48 |
+
'Current Transactions Debit', 'Current Transactions Credit',
|
49 |
+
'Closing Balance Debit', 'Closing Balance Credit',
|
50 |
+
'Debit Amount', 'Credit Amount' ]
|
51 |
+
|
52 |
+
# Replace NaN values with 0 in the defined columns
|
53 |
+
df1[fillna_columns] = df1[fillna_columns].fillna(0)
|
54 |
+
|
55 |
+
# Compute the differences
|
56 |
+
df1['Diff Dr.'] = df1['Current Transactions Debit'] - df1['Debit Amount']
|
57 |
+
df1['Diff Cr.'] = df1['Current Transactions Credit'] - df1['Credit Amount']
|
58 |
+
|
59 |
+
excel_data = to_excel(df1) # Move this line to here
|
60 |
+
|
61 |
+
# Uploaded Documents Section
|
62 |
+
if view_option == 'Uploaded Documents':
|
63 |
+
if uploaded_file1 is not None:
|
64 |
+
st.write(df1)
|
65 |
+
# st.markdown(get_table_download_link(excel_data, 'processed_data.xlsx'), unsafe_allow_html=True)
|
66 |
+
|
67 |
+
if uploaded_file2 is not None:
|
68 |
+
# Save the dataframes to an Excel file
|
69 |
+
excel_data_combined = BytesIO()
|
70 |
+
with pd.ExcelWriter(excel_data_combined, engine='xlsxwriter') as writer:
|
71 |
+
df1.to_excel(writer, sheet_name='Trial Balance', index=False)
|
72 |
+
df2.to_excel(writer, sheet_name='Journal Entry', index=False)
|
73 |
+
|
74 |
+
st.markdown(get_table_download_link(excel_data_combined.getvalue(), filename='combined.xlsx'), unsafe_allow_html=True)
|
75 |
+
|
76 |
+
elif view_option == 'Findings':
|
77 |
+
# Logic for findings should be implemented here
|
78 |
+
st.write(df1)
|
79 |
+
|
80 |
+
# AI Agent Section
|
81 |
+
if openai_api_key and uploaded_file1 is not None:
|
82 |
+
# Create a chat box for user questions
|
83 |
+
user_input = st.text_input('Ask a question:')
|
84 |
+
if user_input:
|
85 |
+
response = get_agent_response(agent, df1, user_input) # pass df1 as an argument
|
86 |
+
st.write(response)
|
87 |
+
|
88 |
+
if uploaded_file2 is not None:
|
89 |
+
# Save the dataframes to an Excel file
|
90 |
+
excel_data_combined = BytesIO()
|
91 |
+
with pd.ExcelWriter(excel_data_combined, engine='xlsxwriter') as writer:
|
92 |
+
df1.to_excel(writer, sheet_name='Trial Balance', index=False)
|
93 |
+
df2.to_excel(writer, sheet_name='Journal Entry', index=False)
|
94 |
+
|
95 |
+
st.markdown(get_table_download_link(excel_data_combined.getvalue(), filename='combined.xlsx'), unsafe_allow_html=True)
|
96 |
+
|
data_processing.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
def remove_nulls(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
|
5 |
+
for column in columns:
|
6 |
+
df = df[df[column].notnull() & df[column].astype(str).str[0].str.isdigit()]
|
7 |
+
return df
|
8 |
+
|
9 |
+
def remove_na_accounts(df: pd.DataFrame) -> pd.DataFrame:
|
10 |
+
df = df.dropna(subset=['Account'])
|
11 |
+
return df
|
12 |
+
|
13 |
+
def remove_empty_columns(df: pd.DataFrame) -> pd.DataFrame:
|
14 |
+
df = df.dropna(how='all', axis=1)
|
15 |
+
return df
|
16 |
+
|
17 |
+
def handle_unknown_columns(df: pd.DataFrame) -> pd.DataFrame:
|
18 |
+
df = df.apply(lambda x: x[x.astype(str).str[0].str.isdigit()] if x.dtype in ['object', 'int64'] else x)
|
19 |
+
return df
|
20 |
+
|
21 |
+
def rename_columns(df: pd.DataFrame) -> pd.DataFrame:
|
22 |
+
if len(df.columns) == 10:
|
23 |
+
df.columns = ['Account', 'Description', 'Opening Balance Debit', 'Opening Balance Credit',
|
24 |
+
'Current Transactions Debit', 'Current Transactions Credit',
|
25 |
+
'Total Transactions Debit', 'Total Transactions Credit',
|
26 |
+
'Closing Balance Debit', 'Closing Balance Credit']
|
27 |
+
elif len(df.columns) == 8:
|
28 |
+
df.columns = ['Account', 'Description', 'Opening Balance Debit', 'Opening Balance Credit',
|
29 |
+
'Current Transactions Debit', 'Current Transactions Credit',
|
30 |
+
'Closing Balance Debit', 'Closing Balance Credit']
|
31 |
+
return df
|
32 |
+
|
33 |
+
def convert_to_float(df: pd.DataFrame, skip_columns: List[str]) -> pd.DataFrame:
|
34 |
+
df = df.apply(lambda x: x.astype(str).str.replace(',', '').astype(float) if x.name not in skip_columns else x)
|
35 |
+
return df
|
36 |
+
|
37 |
+
def process_dataframe(df: pd.DataFrame, *args) -> pd.DataFrame:
|
38 |
+
df = remove_nulls(df, args)
|
39 |
+
df = remove_empty_columns(df)
|
40 |
+
df = handle_unknown_columns(df)
|
41 |
+
df = rename_columns(df)
|
42 |
+
df = convert_to_float(df, ['Account', 'Description'])
|
43 |
+
return df
|
44 |
+
|
45 |
+
|
46 |
+
def rename_columns_je(df):
|
47 |
+
column_mapping = {
|
48 |
+
'Cont debitor': 'Account Debit',
|
49 |
+
'Cont creditor': 'Account Credit',
|
50 |
+
'Suma': 'Amount'
|
51 |
+
}
|
52 |
+
|
53 |
+
df.rename(columns=column_mapping, inplace=True)
|
54 |
+
return df
|
55 |
+
|
56 |
+
def process_journal(df: pd.DataFrame) -> pd.DataFrame:
|
57 |
+
df = rename_columns_je(df)
|
58 |
+
|
59 |
+
transactions_dr = df.groupby('Account Debit').agg({'Amount': 'sum'}).reset_index().rename(columns={'Amount': 'Debit Amount', 'Account Debit': 'Account'})
|
60 |
+
transactions_cr = df.groupby('Account Credit').agg({'Amount': 'sum'}).reset_index().rename(columns={'Amount': 'Credit Amount', 'Account Credit': 'Account'})
|
61 |
+
|
62 |
+
df_out = pd.merge(transactions_dr, transactions_cr, on='Account', how='outer')
|
63 |
+
|
64 |
+
df_out.fillna(0, inplace=True)
|
65 |
+
|
66 |
+
return df_out
|
67 |
+
|
llm_agent.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
from pandasai import PandasAI
|
4 |
+
from pandasai.llm.openai import OpenAI
|
5 |
+
|
6 |
+
def set_openai_key(api_key):
|
7 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
8 |
+
return api_key
|
9 |
+
|
10 |
+
def init_agent(api_key):
|
11 |
+
api_key = set_openai_key(api_key)
|
12 |
+
llm = OpenAI(api_token=api_key)
|
13 |
+
pandas_ai = PandasAI(llm, conversational=False)
|
14 |
+
return pandas_ai
|
15 |
+
|
16 |
+
def get_agent_response(agent, df, user_input):
|
17 |
+
response = agent.run(df, user_input)
|
18 |
+
return response
|
utils.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import pandas as pd
|
3 |
+
from io import BytesIO
|
4 |
+
import os
|
5 |
+
|
6 |
+
def get_table_download_link(excel_data: bytes, filename: str = 'data.xlsx') -> str:
|
7 |
+
"""Generates a link allowing the data in a given pandas dataframe to be downloaded
|
8 |
+
in: dataframe
|
9 |
+
out: href string
|
10 |
+
"""
|
11 |
+
b64 = base64.b64encode(excel_data).decode() # some strings <-> bytes conversions necessary here
|
12 |
+
href = f'<a href="data:application/octet-stream;base64,{b64}" download={filename}>Download Excel File</a>'
|
13 |
+
return href
|
14 |
+
|
15 |
+
def convert_and_save_as_csv(uploaded_file) -> str:
|
16 |
+
# Check if the uploaded file is not None
|
17 |
+
if uploaded_file is not None:
|
18 |
+
try:
|
19 |
+
# Read the file with pandas
|
20 |
+
df = pd.read_excel(uploaded_file)
|
21 |
+
# Save the file as a CSV file
|
22 |
+
csv_file_path = os.path.splitext(uploaded_file.name)[0] + '.csv'
|
23 |
+
df.to_csv(csv_file_path, index=False)
|
24 |
+
return csv_file_path
|
25 |
+
except Exception as e:
|
26 |
+
print("Error: ", e)
|
27 |
+
return None
|
28 |
+
else:
|
29 |
+
print("No file uploaded.")
|
30 |
+
return None
|
31 |
+
|
32 |
+
def to_excel(df):
|
33 |
+
output = BytesIO()
|
34 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
35 |
+
df.to_excel(writer, sheet_name='Sheet1', index=False)
|
36 |
+
return output.getvalue()
|
37 |
+
|
38 |
+
def load_data(file_path: str) -> pd.DataFrame:
|
39 |
+
try:
|
40 |
+
df = pd.read_csv(file_path)
|
41 |
+
return df
|
42 |
+
except FileNotFoundError as e:
|
43 |
+
print(f"File not found: {e}")
|
44 |
+
return None
|