Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

DavMelchi commited on Jul 11

Commit

c83b398

1 Parent(s): 7c8002a

New code for dump compare

Browse files

Files changed (1) hide show

apps/dump_compare.py +93 -177

apps/dump_compare.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
-import shutil
-import tempfile
 import pandas as pd
 import streamlit as st
-# import xlwings as xw
-# === Core Logic ===
 def find_header_row(df, keyword="Dist_Name"):
@@ -18,37 +16,13 @@ def find_header_row(df, keyword="Dist_Name"):
     raise ValueError(f"No row with '{keyword}' found.")
-# import xlwings as xw
-# def read_sheet_fallback(file, sheet):
-#     try:
-#         app = xw.App(visible=False)
-#         book = app.books.open(file)
-#         sht = book.sheets[sheet]
-#         df = sht.used_range.options(pd.DataFrame, header=False, index=False).value
-#         book.close()
-#         app.quit()
-#         return df
-#     except Exception as e2:
-#         raise RuntimeError(f"xlwings failed: {e2}")
-def read_sheet_fallback(file, sheet):
-    try:
-        # pandas can directly read Excel files
-        # sheet_name can be the sheet name (string) or sheet number (0-indexed)
-        df = pd.read_excel(file, sheet_name=sheet, header=None, engine="calamine")
-        return df
-    except FileNotFoundError:
-        raise FileNotFoundError(f"The file '{file}' was not found.")
-    except ValueError as e:
-        # This could happen if the sheet doesn't exist, or other pandas-related errors
-        raise ValueError(f"Error reading sheet '{sheet}' from '{file}': {e}")
-    except Exception as e:
-        raise RuntimeError(f"An unexpected error occurred: {e}")
-def load_clean_df(file, sheet):
-    df_raw = read_sheet_fallback(file, sheet)
     header_row = find_header_row(df_raw)
     df_raw.columns = df_raw.iloc[header_row]
     df = df_raw.drop(index=list(range(header_row + 1)))
@@ -64,152 +38,94 @@ def detect_dist_col(columns):
     raise ValueError("Dist_Name column not found.")
-def compare_dumps(
-    old_file,
-    new_file,
-    mo_list,
-    output_dir,
-    #   progress_callback=None
-):
-    os.makedirs(output_dir, exist_ok=True)
-    # Friendly column labels based on file names
-    old_label = os.path.basename(old_file)
-    new_label = os.path.basename(new_file)
-    total_changes = 0
-    logs = []
-    for i, sheet_name in enumerate(mo_list):
-        try:
-            df_old = load_clean_df(old_file, sheet_name)
-            df_new = load_clean_df(new_file, sheet_name)
-            dist_col_old = detect_dist_col(df_old.columns)
-            dist_col_new = detect_dist_col(df_new.columns)
-            df_old = df_old[df_old[dist_col_old].notna()].set_index(dist_col_old)
-            df_new = df_new[df_new[dist_col_new].notna()].set_index(dist_col_new)
-            common = df_old.index.intersection(df_new.index)
-            df_old_common = df_old.loc[common]
-            df_new_common = df_new.loc[common]
-            mask = (df_old_common != df_new_common) & ~(
-                df_old_common.isna() & df_new_common.isna()
-            )
-            changes = []
-            for dist in mask.index:
-                for param in mask.columns[mask.loc[dist]]:
-                    if param.strip().lower() == "file_name":
-                        continue
-                    changes.append(
-                        {
-                            "Dist_Name": dist,
-                            "Parameter": param,
-                            old_label: df_old_common.loc[dist, param],
-                            new_label: df_new_common.loc[dist, param],
-                        }
-                    )
-            df_changes = pd.DataFrame(changes)
-            if not df_changes.empty:
-                output_path = os.path.join(output_dir, f"{sheet_name}_differences.xlsx")
-                df_changes.to_excel(output_path, index=False)
-                logs.append(f"{len(df_changes)} changes in {sheet_name}")
-                total_changes += len(df_changes)
-            else:
-                logs.append(f"No changes in {sheet_name}")
-        except Exception as e:
-            logs.append(f"Error in {sheet_name}: {e}")
-    # if progress_callback:
-    #     progress_callback((i + 1) / len(mo_list))
-    return total_changes, logs
-# === Streamlit UI ===
-st.title("📊 Dump Compare Tool")
 old_file = st.file_uploader("Upload Old Dump (.xlsb)", type=["xlsb"], key="old")
 new_file = st.file_uploader("Upload New Dump (.xlsb)", type=["xlsb"], key="new")
-# Determine common sheet names available in BOTH uploaded dumps and let the user pick
-common_sheets: list[str] = []
-selected_sheets: list[str] = []
-if old_file and new_file:
-    import tempfile as _tmp
-    from pyxlsb import open_workbook as _open_wb
-    def _get_sheet_names(uploaded_file) -> list[str]:
-        """Return sheet names from an `st.uploaded_file` object."""
-        with _tmp.NamedTemporaryFile(delete=False, suffix=".xlsb") as tmp:
-            tmp.write(uploaded_file.getvalue())
-            tmp_path = tmp.name
-        try:
-            with _open_wb(tmp_path) as wb:
-                # `wb.sheets` in pyxlsb already returns a list of sheet names (str)
-                return list(wb.sheets)
-        finally:
-            os.remove(tmp_path)
-    common_sheets = sorted(
-        set(_get_sheet_names(old_file)).intersection(_get_sheet_names(new_file))
-    )
-    if common_sheets:
-        selected_sheets = st.multiselect(
-            "MO Sheet Names (choose one or more)",
-            common_sheets,
-            default=common_sheets[:1],  # select only the first sheet by default
-        )
-    else:
-        st.warning("No common sheet names found between the two files.")
-output_dir = "comparison_output"  # fixed output folder name
 if st.button("Run Comparison", type="primary", use_container_width=True):
-    if not all([old_file, new_file]) or not selected_sheets:
-        st.warning("Please upload both files and select at least one common sheet.")
     else:
-        mo_list = selected_sheets
-        # Reset file pointers because they may have been consumed while reading sheet names
-        old_file.seek(0)
-        new_file.seek(0)
-        with st.spinner("Comparing dumps..."):
-            with tempfile.TemporaryDirectory() as tmpdir:
-                output_path = os.path.join(tmpdir, output_dir)
-                old_path = os.path.join(tmpdir, "old.xlsb")
-                new_path = os.path.join(tmpdir, "new.xlsb")
-                with open(old_path, "wb") as f:
-                    f.write(old_file.read())
-                with open(new_path, "wb") as f:
-                    f.write(new_file.read())
-                # progress_bar = st.progress(0.0)
-                # def update_progress(pct):
-                #     progress_bar.progress(pct)
-                total, logs = compare_dumps(old_path, new_path, mo_list, output_path)
-                st.success(f"✅ Comparison completed. Total changes: {total}")
-                # Zip and offer download
-                shutil.make_archive(output_path, "zip", output_path)
-                with open(f"{output_path}.zip", "rb") as f:
-                    st.download_button(
-                        "Download Results (.zip)",
-                        f,
-                        file_name="differences.zip",
-                        mime="application/zip",
-                        type="primary",
-                        on_click="ignore",
-                    )

 import os
+import zipfile
+from io import BytesIO
 import pandas as pd
 import streamlit as st
+# === Fonctions ===
 def find_header_row(df, keyword="Dist_Name"):
     raise ValueError(f"No row with '{keyword}' found.")
+def read_sheet_fallback(file_bytes, sheet):
+    file_bytes.seek(0)
+    return pd.read_excel(file_bytes, sheet_name=sheet, header=None, engine="calamine")
+def load_clean_df(file_bytes, sheet):
+    df_raw = read_sheet_fallback(file_bytes, sheet)
     header_row = find_header_row(df_raw)
     df_raw.columns = df_raw.iloc[header_row]
     df = df_raw.drop(index=list(range(header_row + 1)))
     raise ValueError("Dist_Name column not found.")
+# === Interface Streamlit ===
+st.title("📊 Dump Compare Tool (In-Memory with Calamine)")
 old_file = st.file_uploader("Upload Old Dump (.xlsb)", type=["xlsb"], key="old")
 new_file = st.file_uploader("Upload New Dump (.xlsb)", type=["xlsb"], key="new")
+sheet_list_input = st.text_input(
+    "Enter sheet names (comma-separated)", placeholder="e.g. BCF, BTS, CELL"
+)
 if st.button("Run Comparison", type="primary", use_container_width=True):
+    if not all([old_file, new_file, sheet_list_input.strip()]):
+        st.warning("Please upload both files and provide at least one sheet name.")
     else:
+        sheet_names = [s.strip() for s in sheet_list_input.split(",") if s.strip()]
+        old_bytes = BytesIO(old_file.read())
+        new_bytes = BytesIO(new_file.read())
+        logs = []
+        total = 0
+        all_results = {}
+        for sheet in sheet_names:
+            try:
+                df_old = load_clean_df(old_bytes, sheet)
+                old_bytes.seek(0)
+                df_new = load_clean_df(new_bytes, sheet)
+                new_bytes.seek(0)
+                dist_col_old = detect_dist_col(df_old.columns)
+                dist_col_new = detect_dist_col(df_new.columns)
+                df_old = df_old[df_old[dist_col_old].notna()].set_index(dist_col_old)
+                df_new = df_new[df_new[dist_col_new].notna()].set_index(dist_col_new)
+                common = df_old.index.intersection(df_new.index)
+                df_old_common = df_old.loc[common]
+                df_new_common = df_new.loc[common]
+                mask = (df_old_common != df_new_common) & ~(
+                    df_old_common.isna() & df_new_common.isna()
+                )
+                changes = []
+                for dist in mask.index:
+                    for param in mask.columns[mask.loc[dist]]:
+                        if param.strip().lower() == "file_name":
+                            continue
+                        changes.append(
+                            {
+                                "Dist_Name": dist,
+                                "Parameter": param,
+                                os.path.basename(old_file.name): df_old_common.loc[
+                                    dist, param
+                                ],
+                                os.path.basename(new_file.name): df_new_common.loc[
+                                    dist, param
+                                ],
+                            }
+                        )
+                df_changes = pd.DataFrame(changes)
+                if not df_changes.empty:
+                    all_results[sheet] = df_changes
+                    logs.append(f"{len(df_changes)} changes in '{sheet}'")
+                    total += len(df_changes)
+                else:
+                    logs.append(f"No changes in '{sheet}'")
+            except Exception as e:
+                logs.append(f"❌ Error in '{sheet}': {e}")
+        st.success(f"✅ Comparison completed. Total changes: {total}")
+        for log in logs:
+            st.write(log)
+        if all_results:
+            output_buffer = BytesIO()
+            with zipfile.ZipFile(output_buffer, mode="w") as zf:
+                for sheet, df in all_results.items():
+                    file_buffer = BytesIO()
+                    df.to_excel(file_buffer, index=False)
+                    zf.writestr(f"{sheet}_differences.xlsx", file_buffer.getvalue())
+            st.download_button(
+                "Download Results (.zip)",
+                data=output_buffer.getvalue(),
+                file_name="differences.zip",
+                mime="application/zip",
+            )