|
import streamlit as st |
|
from io import BytesIO |
|
from openpyxl import load_workbook, Workbook |
|
from openpyxl.styles import NamedStyle |
|
|
|
def process_excel(file, columns, original_file_name, sheet_name="Linjer", header_row=3): |
|
logs = [] |
|
|
|
|
|
try: |
|
wb = load_workbook(file) |
|
if sheet_name not in wb.sheetnames: |
|
return {}, [f"Worksheet '{sheet_name}' not found. Please upload a file with the correct structure."] |
|
ws = wb[sheet_name] |
|
logs.append(f"Loaded worksheet '{sheet_name}'. Other worksheets were ignored.") |
|
except Exception as e: |
|
return {}, [f"An error occurred while reading the file: {e}"] |
|
|
|
|
|
metadata_rows = [[cell.value for cell in row] for row in ws.iter_rows(min_row=1, max_row=2)] |
|
|
|
|
|
headers = [cell.value for cell in ws[header_row]] |
|
|
|
|
|
column_indexes = {header: i for i, header in enumerate(headers)} |
|
for col in columns: |
|
if col not in column_indexes: |
|
return {}, [f"Column '{col}' not found in the worksheet. Please check the file."] |
|
|
|
logs.append(f"Headers found: {headers}") |
|
|
|
|
|
data_rows = [] |
|
for row in ws.iter_rows(min_row=header_row+1, values_only=True): |
|
data_rows.append(list(row)) |
|
|
|
logs.append(f"Loaded {len(data_rows)} data rows.") |
|
|
|
|
|
unique_values = set() |
|
for row in data_rows: |
|
for col in columns: |
|
idx = column_indexes[col] |
|
if row[idx] is not None: |
|
unique_values.add(row[idx]) |
|
|
|
logs.append(f"Total unique values across columns: {list(unique_values)}") |
|
|
|
|
|
iso_datetime_style = NamedStyle(name="iso_datetime", number_format="YYYY-MM-DDTHH:MM:SS") |
|
|
|
split_files = {} |
|
|
|
for value in unique_values: |
|
|
|
split_data = [row for row in data_rows if row[column_indexes[columns[0]]] == value or row[column_indexes[columns[1]]] == value] |
|
|
|
if not split_data: |
|
logs.append(f"No rows found for value '{value}'. Skipping file creation.") |
|
continue |
|
|
|
|
|
new_wb = Workbook() |
|
new_ws = new_wb.active |
|
new_ws.title = "Sheet1" |
|
|
|
|
|
for row in metadata_rows: |
|
new_ws.append(row) |
|
|
|
|
|
new_ws.append(headers) |
|
|
|
|
|
for row in split_data: |
|
new_row = [] |
|
for i, cell in enumerate(row): |
|
if headers[i] in ["Fragtbrevsdato", "Fakturadato", "Leveringstidspunkt"]: |
|
if isinstance(cell, (int, float)): |
|
new_row.append(cell) |
|
new_ws.cell(row=new_ws.max_row+1, column=i+1).style = iso_datetime_style |
|
else: |
|
try: |
|
new_row.append(cell.strftime("%Y-%m-%dT%H:%M:%S")) |
|
except: |
|
new_row.append(cell) |
|
else: |
|
new_row.append(cell) |
|
new_ws.append(new_row) |
|
|
|
|
|
output = BytesIO() |
|
new_wb.save(output) |
|
output.seek(0) |
|
|
|
sanitized_value = str(value).replace("/", "-").replace("\\", "-") |
|
file_name = f"{original_file_name} - {sanitized_value}.xlsx" |
|
split_files[file_name] = output |
|
|
|
logs.append(f"File '{file_name}' created with {len(split_data)} rows (plus metadata and headers).") |
|
|
|
return split_files, logs |
|
|
|
st.title("BG File Splitter") |
|
st.write("Upload an Excel file to split it based on unique values in specified columns.") |
|
|
|
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx"]) |
|
|
|
if uploaded_file: |
|
st.write("Processing the uploaded file...") |
|
original_file_name = uploaded_file.name.rsplit(".", 1)[0] |
|
columns_to_split = ['Afsenderkunde', 'Modtagerkunde'] |
|
|
|
split_files, logs = process_excel(uploaded_file, columns_to_split, original_file_name) |
|
|
|
st.write("### Logs (Debugging Info)") |
|
for log in logs: |
|
st.text(log) |
|
|
|
if split_files: |
|
st.write("### Download Split Files") |
|
for file_name, file_content in split_files.items(): |
|
st.download_button( |
|
label=f"Download {file_name}", |
|
data=file_content, |
|
file_name=file_name, |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
) |
|
else: |
|
st.write("No files were created. Check the logs for details.") |
|
|