import streamlit as st import pandas as pd from io import StringIO def load_file(file, header_row): if file is None: return None, None try: if file.name.endswith(".xlsx"): df = pd.read_excel(file, header=header_row - 1) else: df = pd.read_csv(StringIO(file.getvalue().decode("utf-8")), header=header_row - 1) headers = df.columns.tolist() return headers, df except Exception as e: st.error(f"Error loading file: {e}") return None, None def main(): st.title("File Header and Data Comparison Tool") # First file upload st.header("Upload First File") if "file1" not in st.session_state: st.session_state["file1"] = None st.session_state["headers1"] = None st.session_state["df1"] = None file1 = st.file_uploader("Choose the first file (CSV or Excel)", type=["csv", "xlsx"], key="file1_uploader") if file1: header_row1 = st.number_input("Specify the row number for headers in the first file", min_value=1, value=1, key="header_row1") if st.button("Load First File", key="load_file1"): headers1, df1 = load_file(file1, header_row1) if headers1: st.session_state["file1"] = file1 st.session_state["headers1"] = headers1 st.session_state["df1"] = df1 st.success(f"Headers from the first file: {headers1}") else: st.error("Failed to load the first file.") # Second file upload st.header("Upload Second File") if "file2" not in st.session_state: st.session_state["file2"] = None st.session_state["headers2"] = None st.session_state["df2"] = None file2 = st.file_uploader("Choose the second file (CSV or Excel)", type=["csv", "xlsx"], key="file2_uploader") if file2: header_row2 = st.number_input("Specify the row number for headers in the second file", min_value=1, value=1, key="header_row2") if st.button("Load Second File", key="load_file2"): headers2, df2 = load_file(file2, header_row2) if headers2: st.session_state["file2"] = file2 st.session_state["headers2"] = headers2 st.session_state["df2"] = df2 st.success(f"Headers from the second file: {headers2}") else: st.error("Failed to load the second file.") # Compare headers if st.session_state["headers1"] and st.session_state["headers2"]: headers1 = st.session_state["headers1"] headers2 = st.session_state["headers2"] st.header("Header Comparison Results") missing_in_file2 = [header for header in headers1 if header not in headers2] missing_in_file1 = [header for header in headers2 if header not in headers1] if missing_in_file2 or missing_in_file1: st.write("Differences in headers:") if missing_in_file2: st.write(f"Headers in File 1 but not in File 2: {missing_in_file2}") if missing_in_file1: st.write(f"Headers in File 2 but not in File 1: {missing_in_file1}") else: st.success("Headers match perfectly!") # Compare column values where headers match common_headers = [header for header in headers1 if header in headers2] if common_headers: st.header("Column Value Differences") df1 = st.session_state["df1"] df2 = st.session_state["df2"] for header in common_headers: col1 = df1[header].dropna().unique() col2 = df2[header].dropna().unique() if not pd.Series(col1).equals(pd.Series(col2)): st.write(f"Column '{header}' differs between the files.") st.write(f"Values in File 1 but not in File 2: {set(col1) - set(col2)}") st.write(f"Values in File 2 but not in File 1: {set(col2) - set(col1)}") else: st.write(f"Column '{header}' matches in both files.") if __name__ == "__main__": main()