GB-GB / BG.py
Esben922's picture
Update BG.py
5d98658 verified
import streamlit as st
from io import BytesIO
from openpyxl import load_workbook, Workbook
from openpyxl.styles import NamedStyle
def process_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3):
logs = []
# Load the Excel file using openpyxl
try:
wb = load_workbook(file)
if sheet_name not in wb.sheetnames:
return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."]
ws = wb[sheet_name]
logs.append(f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored.")
except Exception as e:
return {}, [f"An error occurred while reading the file: {e}"]
# Read metadata rows (first two rows)
metadata_rows = [[cell.value for cell in row] for row in ws.iter_rows(min_row=1, max_row=2)]
# Read headers from row 3
headers = [cell.value for cell in ws[header_row]]
# Ensure required columns exist
column_indexes = {header: i for i, header in enumerate(headers)}
for col in columns:
if col not in column_indexes:
return {}, [f"Column '{col}' not found in the worksheet. Please check the file."]
logs.append(f"Headers found: {headers}")
# Read all data rows starting from row 4
data_rows = []
for row in ws.iter_rows(min_row=header_row+1, values_only=True):
data_rows.append(list(row))
logs.append(f"Loaded {len(data_rows)} data rows.")
# Get unique values for splitting
unique_values = set()
for row in data_rows:
for col in columns:
idx = column_indexes[col]
if row[idx] is not None:
unique_values.add(row[idx])
logs.append(f"Total unique values across columns: {list(unique_values)}")
# Define the ISO-like datetime format (YYYY-MM-DDTHH:MM:SS)
iso_datetime_style = NamedStyle(name="iso_datetime", number_format="YYYY-MM-DDTHH:MM:SS")
split_files = {}
for value in unique_values:
# Filter rows based on value
split_data = [row for row in data_rows if row[column_indexes[columns[0]]] == value or row[column_indexes[columns[1]]] == value]
if not split_data:
logs.append(f"No rows found for value '{value}'. Skipping file creation.")
continue
# Create a new workbook for the split file
new_wb = Workbook()
new_ws = new_wb.active
new_ws.title = "Sheet1"
# Write metadata rows
for row in metadata_rows:
new_ws.append(row)
# Write headers
new_ws.append(headers)
# Write data rows with correct formatting
for row in split_data:
new_row = []
for i, cell in enumerate(row):
if headers[i] in ["Fragtbrevsdato", "Fakturadato", "Leveringstidspunkt"]:
if isinstance(cell, (int, float)): # Handle numeric dates (Excel stores dates as numbers)
new_row.append(cell)
new_ws.cell(row=new_ws.max_row+1, column=i+1).style = iso_datetime_style
else:
try:
new_row.append(cell.strftime("%Y-%m-%dT%H:%M:%S"))
except:
new_row.append(cell)
else:
new_row.append(cell)
new_ws.append(new_row)
# Save to BytesIO buffer
output = BytesIO()
new_wb.save(output)
output.seek(0)
sanitized_value = str(value).replace("/", "-").replace("\\", "-")
file_name = f"{original_file_name} - {sanitized_value}.xlsx"
split_files[file_name] = output
logs.append(f"File '{file_name}' created with {len(split_data)} rows (plus metadata and headers).")
return split_files, logs
st.title("BG File Splitter")
st.write("Upload an Excel file to split it based on unique values in specified columns.")
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"])
if uploaded_file:
st.write("Processing the uploaded file...")
original_file_name = uploaded_file.name.rsplit(".", 1)[0]
columns_to_split = ['Afsenderkunde', 'Modtagerkunde']
split_files, logs = process_excel(uploaded_file, columns_to_split, original_file_name)
st.write("### Logs (Debugging Info)")
for log in logs:
st.text(log)
if split_files:
st.write("### Download Split Files")
for file_name, file_content in split_files.items():
st.download_button(
label=f"Download {file_name}",
data=file_content,
file_name=file_name,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.write("No files were created. Check the logs for details.")