Spaces:
Sleeping
Sleeping
File size: 6,996 Bytes
39e62fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import os
import streamlit as st
import pandas as pd
import numpy as np
from io import BytesIO
import tempfile
# Optional: PDF extraction if needed
try:
import pdfplumber
except ImportError:
pdfplumber = None
# FAISS for potential vector similarity (for future enhancement)
import faiss
# Groq API for LLM integration
from groq import Groq
# -------------------------------
# Initialize Groq Client
# -------------------------------
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# -------------------------------
# Utility Functions
# -------------------------------
def load_ledger(file):
"""
Load ledger from CSV, JSON, or PDF.
"""
file_ext = os.path.splitext(file.name)[1].lower()
if file_ext == ".csv":
df = pd.read_csv(file)
elif file_ext == ".json":
df = pd.read_json(file)
elif file_ext == ".pdf":
if pdfplumber is None:
st.error("Please install pdfplumber to process PDF files.")
return None
with pdfplumber.open(file) as pdf:
page = pdf.pages[0] # Assumes table on first page
table = page.extract_table()
df = pd.DataFrame(table[1:], columns=table[0])
else:
st.error("Unsupported file type!")
return None
return df
def preprocess_ledger(df):
"""
Standardize date format and convert credit/debit to float.
"""
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['credit'] = pd.to_numeric(df['credit'], errors='coerce').fillna(0.0)
df['debit'] = pd.to_numeric(df['debit'], errors='coerce').fillna(0.0)
return df
def generate_suggestion(row):
"""
Generate a reconciliation suggestion using Groq API.
"""
prompt = (
f"Ledger entry mismatch detected.\n"
f"- Date: {row['date'].date() if pd.notnull(row['date']) else 'Unknown'}\n"
f"- Credit: {row['credit']}\n"
f"- Debit: {row['debit']}\n\n"
"Please provide reconciliation suggestions in simple bullet points."
)
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.3-70b-versatile",
stream=False,
)
suggestion = response.choices[0].message.content
except Exception as e:
suggestion = f"Error generating suggestion: {e}"
return suggestion
def compare_ledgers(df_a, df_b):
"""
Compare two ledger DataFrames row-by-row based on date, credit, and debit.
"""
results = []
df_b_copy = df_b.copy()
# Compare each entry in Ledger A with Ledger B
for idx, row in df_a.iterrows():
# Match based on same date and nearly identical credit & debit amounts.
match = df_b_copy[
(df_b_copy['date'] == row['date']) &
(np.isclose(df_b_copy['credit'], row['credit'])) &
(np.isclose(df_b_copy['debit'], row['debit']))
]
if not match.empty:
status = "β
Matched"
suggestion = ""
# Remove matched entry to prevent duplicate matching.
df_b_copy = df_b_copy.drop(match.index[0])
else:
status = "β Mismatch"
suggestion = generate_suggestion(row)
results.append({
"date": row['date'],
"credit": row['credit'],
"debit": row['debit'],
"description": row.get("description", ""),
"status": status,
"suggestion": suggestion
})
# Any remaining entries in Ledger B are extra entries.
for idx, row in df_b_copy.iterrows():
results.append({
"date": row['date'],
"credit": row['credit'],
"debit": row['debit'],
"description": row.get("description", ""),
"status": "β Mismatch (Extra in Ledger B)",
"suggestion": "Review extra entry in Ledger B."
})
result_df = pd.DataFrame(results)
return result_df
def calculate_totals(df_a, df_b):
"""
Calculate totals and differences for credits and debits.
"""
totals = {
"ledger_a_credit": df_a['credit'].sum(),
"ledger_a_debit": df_a['debit'].sum(),
"ledger_b_credit": df_b['credit'].sum(),
"ledger_b_debit": df_b['debit'].sum(),
"credit_difference": df_a['credit'].sum() - df_b['credit'].sum(),
"debit_difference": df_a['debit'].sum() - df_b['debit'].sum(),
}
return totals
def generate_excel_report(df):
"""
Generate an Excel report from the reconciliation DataFrame.
"""
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False, sheet_name="Reconciliation")
processed_data = output.getvalue()
return processed_data
# -------------------------------
# Streamlit User Interface
# -------------------------------
def main():
st.title("π Finance Ledger Reconciliation App")
st.markdown("Upload the ledger files to compare two opposite party records and get reconciliation suggestions.")
col1, col2 = st.columns(2)
with col1:
ledger_a_file = st.file_uploader("Upload Ledger A (CSV/JSON/PDF)", type=["csv", "json", "pdf"], key="ledger_a")
with col2:
ledger_b_file = st.file_uploader("Upload Ledger B (CSV/JSON/PDF)", type=["csv", "json", "pdf"], key="ledger_b")
if ledger_a_file and ledger_b_file:
df_a = load_ledger(ledger_a_file)
df_b = load_ledger(ledger_b_file)
if df_a is not None and df_b is not None:
st.subheader("Original Ledgers Preview")
st.markdown("**Ledger A:**")
st.write(df_a.head())
st.markdown("**Ledger B:**")
st.write(df_b.head())
# Preprocess the data
df_a = preprocess_ledger(df_a)
df_b = preprocess_ledger(df_b)
st.subheader("Processed Ledgers Preview")
st.markdown("**Ledger A:**")
st.write(df_a.head())
st.markdown("**Ledger B:**")
st.write(df_b.head())
# Compare ledgers and calculate differences
with st.spinner("Comparing ledgers..."):
result_df = compare_ledgers(df_a, df_b)
totals = calculate_totals(df_a, df_b)
st.subheader("Reconciliation Results")
st.write(result_df)
st.markdown("### Totals & Differences")
st.write(totals)
# Download report button (Excel file)
excel_data = generate_excel_report(result_df)
st.download_button(label="Download Report as Excel",
data=excel_data,
file_name="reconciliation_report.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
st.success("Reconciliation completed successfully!")
if __name__ == '__main__':
main() |