Spaces:

Esben922
/

GB-GB

Running

App Files Files Community

Esben922 commited on Dec 6, 2024

Commit

64343ed

verified ·

1 Parent(s): c3f699a

Update HeaderChecker.py

Browse files

Files changed (1) hide show

HeaderChecker.py +99 -93

HeaderChecker.py CHANGED Viewed

@@ -1,93 +1,99 @@
-import streamlit as st
-import pandas as pd
-def load_file_and_get_headers(file, header_row):
-    try:
-        if file.name.endswith(".xlsx"):
-            df = pd.read_excel(file, header=header_row - 1)
-        else:
-            df = pd.read_csv(file, header=header_row - 1)
-        headers = df.columns.tolist()
-        return headers, df
-    except Exception as e:
-        st.error(f"Error loading file: {e}")
-        return [], None
-def main():
-    st.title("File Header and Data Comparison Tool")
-    # Upload first file
-    st.header("Upload First File")
-    file1 = st.file_uploader("Choose the first file (CSV or Excel)", type=["csv", "xlsx"])
-    if file1:
-        header_row1 = st.number_input("Specify the row number for headers in the first file", min_value=1, value=1)
-        if st.button("Load First File", key="load_file1"):
-            headers1, df1 = load_file_and_get_headers(file1, header_row1)
-            if headers1:
-                st.session_state["headers1"] = headers1
-                st.session_state["df1"] = df1
-                st.success(f"Headers from the first file: {headers1}")
-            else:
-                st.error("Failed to load headers from the first file.")
-    # Upload second file
-    st.header("Upload Second File")
-    file2 = st.file_uploader("Choose the second file (CSV or Excel)", type=["csv", "xlsx"], key="file2")
-    if file2:
-        header_row2 = st.number_input("Specify the row number for headers in the second file", min_value=1, value=1, key="header2")
-        if st.button("Load Second File", key="load_file2"):
-            headers2, df2 = load_file_and_get_headers(file2, header_row2)
-            if headers2:
-                st.session_state["headers2"] = headers2
-                st.session_state["df2"] = df2
-                st.success(f"Headers from the second file: {headers2}")
-            else:
-                st.error("Failed to load headers from the second file.")
-    # Compare headers
-    if "headers1" in st.session_state and "headers2" in st.session_state:
-        headers1 = st.session_state["headers1"]
-        headers2 = st.session_state["headers2"]
-        if headers1 and headers2:  # Ensure both headers are not None
-            st.header("Header Comparison Results")
-            missing_in_file2 = [header for header in headers1 if header not in headers2]
-            missing_in_file1 = [header for header in headers2 if header not in headers1]
-            if missing_in_file2 or missing_in_file1:
-                st.write("Differences in headers:")
-                if missing_in_file2:
-                    st.write(f"Headers in File 1 but not in File 2: {missing_in_file2}")
-                if missing_in_file1:
-                    st.write(f"Headers in File 2 but not in File 1: {missing_in_file1}")
-            else:
-                st.success("Headers match perfectly!")
-            # Compare column values where headers match
-            common_headers = [header for header in headers1 if header in headers2]
-            if common_headers:
-                st.header("Column Value Differences")
-                df1 = st.session_state["df1"]
-                df2 = st.session_state["df2"]
-                for header in common_headers:
-                    col1 = df1[header].dropna().unique()
-                    col2 = df2[header].dropna().unique()
-                    if not pd.Series(col1).equals(pd.Series(col2)):
-                        st.write(f"Column '{header}' differs between the files.")
-                        st.write(f"Values in File 1 but not in File 2: {set(col1) - set(col2)}")
-                        st.write(f"Values in File 2 but not in File 1: {set(col2) - set(col1)}")
-                    else:
-                        st.write(f"Column '{header}' matches in both files.")
-        else:
-            st.warning("Both files must be loaded before comparison.")
-if __name__ == "__main__":
-    if "headers1" not in st.session_state:
-        st.session_state["headers1"] = None
-    if "headers2" not in st.session_state:
-        st.session_state["headers2"] = None
-    main()

+import streamlit as st
+import pandas as pd
+from io import StringIO
+def load_file(file, header_row):
+    if file is None:
+        return None, None
+    try:
+        if file.name.endswith(".xlsx"):
+            df = pd.read_excel(file, header=header_row - 1)
+        else:
+            df = pd.read_csv(StringIO(file.getvalue().decode("utf-8")), header=header_row - 1)
+        headers = df.columns.tolist()
+        return headers, df
+    except Exception as e:
+        st.error(f"Error loading file: {e}")
+        return None, None
+def main():
+    st.title("File Header and Data Comparison Tool")
+    # First file upload
+    st.header("Upload First File")
+    if "file1" not in st.session_state:
+        st.session_state["file1"] = None
+        st.session_state["headers1"] = None
+        st.session_state["df1"] = None
+    file1 = st.file_uploader("Choose the first file (CSV or Excel)", type=["csv", "xlsx"], key="file1_uploader")
+    if file1:
+        header_row1 = st.number_input("Specify the row number for headers in the first file", min_value=1, value=1, key="header_row1")
+        if st.button("Load First File", key="load_file1"):
+            headers1, df1 = load_file(file1, header_row1)
+            if headers1:
+                st.session_state["file1"] = file1
+                st.session_state["headers1"] = headers1
+                st.session_state["df1"] = df1
+                st.success(f"Headers from the first file: {headers1}")
+            else:
+                st.error("Failed to load the first file.")
+    # Second file upload
+    st.header("Upload Second File")
+    if "file2" not in st.session_state:
+        st.session_state["file2"] = None
+        st.session_state["headers2"] = None
+        st.session_state["df2"] = None
+    file2 = st.file_uploader("Choose the second file (CSV or Excel)", type=["csv", "xlsx"], key="file2_uploader")
+    if file2:
+        header_row2 = st.number_input("Specify the row number for headers in the second file", min_value=1, value=1, key="header_row2")
+        if st.button("Load Second File", key="load_file2"):
+            headers2, df2 = load_file(file2, header_row2)
+            if headers2:
+                st.session_state["file2"] = file2
+                st.session_state["headers2"] = headers2
+                st.session_state["df2"] = df2
+                st.success(f"Headers from the second file: {headers2}")
+            else:
+                st.error("Failed to load the second file.")
+    # Compare headers
+    if st.session_state["headers1"] and st.session_state["headers2"]:
+        headers1 = st.session_state["headers1"]
+        headers2 = st.session_state["headers2"]
+        st.header("Header Comparison Results")
+        missing_in_file2 = [header for header in headers1 if header not in headers2]
+        missing_in_file1 = [header for header in headers2 if header not in headers1]
+        if missing_in_file2 or missing_in_file1:
+            st.write("Differences in headers:")
+            if missing_in_file2:
+                st.write(f"Headers in File 1 but not in File 2: {missing_in_file2}")
+            if missing_in_file1:
+                st.write(f"Headers in File 2 but not in File 1: {missing_in_file1}")
+        else:
+            st.success("Headers match perfectly!")
+        # Compare column values where headers match
+        common_headers = [header for header in headers1 if header in headers2]
+        if common_headers:
+            st.header("Column Value Differences")
+            df1 = st.session_state["df1"]
+            df2 = st.session_state["df2"]
+            for header in common_headers:
+                col1 = df1[header].dropna().unique()
+                col2 = df2[header].dropna().unique()
+                if not pd.Series(col1).equals(pd.Series(col2)):
+                    st.write(f"Column '{header}' differs between the files.")
+                    st.write(f"Values in File 1 but not in File 2: {set(col1) - set(col2)}")
+                    st.write(f"Values in File 2 but not in File 1: {set(col2) - set(col1)}")
+                else:
+                    st.write(f"Column '{header}' matches in both files.")
+if __name__ == "__main__":
+    main()