Esben922 commited on
Commit
64343ed
·
verified ·
1 Parent(s): c3f699a

Update HeaderChecker.py

Browse files
Files changed (1) hide show
  1. HeaderChecker.py +99 -93
HeaderChecker.py CHANGED
@@ -1,93 +1,99 @@
1
- import streamlit as st
2
- import pandas as pd
3
-
4
- def load_file_and_get_headers(file, header_row):
5
- try:
6
- if file.name.endswith(".xlsx"):
7
- df = pd.read_excel(file, header=header_row - 1)
8
- else:
9
- df = pd.read_csv(file, header=header_row - 1)
10
- headers = df.columns.tolist()
11
- return headers, df
12
- except Exception as e:
13
- st.error(f"Error loading file: {e}")
14
- return [], None
15
-
16
- def main():
17
- st.title("File Header and Data Comparison Tool")
18
-
19
- # Upload first file
20
- st.header("Upload First File")
21
- file1 = st.file_uploader("Choose the first file (CSV or Excel)", type=["csv", "xlsx"])
22
- if file1:
23
- header_row1 = st.number_input("Specify the row number for headers in the first file", min_value=1, value=1)
24
- if st.button("Load First File", key="load_file1"):
25
- headers1, df1 = load_file_and_get_headers(file1, header_row1)
26
- if headers1:
27
- st.session_state["headers1"] = headers1
28
- st.session_state["df1"] = df1
29
- st.success(f"Headers from the first file: {headers1}")
30
- else:
31
- st.error("Failed to load headers from the first file.")
32
-
33
- # Upload second file
34
- st.header("Upload Second File")
35
- file2 = st.file_uploader("Choose the second file (CSV or Excel)", type=["csv", "xlsx"], key="file2")
36
- if file2:
37
- header_row2 = st.number_input("Specify the row number for headers in the second file", min_value=1, value=1, key="header2")
38
- if st.button("Load Second File", key="load_file2"):
39
- headers2, df2 = load_file_and_get_headers(file2, header_row2)
40
- if headers2:
41
- st.session_state["headers2"] = headers2
42
- st.session_state["df2"] = df2
43
- st.success(f"Headers from the second file: {headers2}")
44
- else:
45
- st.error("Failed to load headers from the second file.")
46
-
47
- # Compare headers
48
- if "headers1" in st.session_state and "headers2" in st.session_state:
49
- headers1 = st.session_state["headers1"]
50
- headers2 = st.session_state["headers2"]
51
-
52
- if headers1 and headers2: # Ensure both headers are not None
53
- st.header("Header Comparison Results")
54
-
55
- missing_in_file2 = [header for header in headers1 if header not in headers2]
56
- missing_in_file1 = [header for header in headers2 if header not in headers1]
57
-
58
- if missing_in_file2 or missing_in_file1:
59
- st.write("Differences in headers:")
60
- if missing_in_file2:
61
- st.write(f"Headers in File 1 but not in File 2: {missing_in_file2}")
62
- if missing_in_file1:
63
- st.write(f"Headers in File 2 but not in File 1: {missing_in_file1}")
64
- else:
65
- st.success("Headers match perfectly!")
66
-
67
- # Compare column values where headers match
68
- common_headers = [header for header in headers1 if header in headers2]
69
- if common_headers:
70
- st.header("Column Value Differences")
71
- df1 = st.session_state["df1"]
72
- df2 = st.session_state["df2"]
73
-
74
- for header in common_headers:
75
- col1 = df1[header].dropna().unique()
76
- col2 = df2[header].dropna().unique()
77
-
78
- if not pd.Series(col1).equals(pd.Series(col2)):
79
- st.write(f"Column '{header}' differs between the files.")
80
- st.write(f"Values in File 1 but not in File 2: {set(col1) - set(col2)}")
81
- st.write(f"Values in File 2 but not in File 1: {set(col2) - set(col1)}")
82
- else:
83
- st.write(f"Column '{header}' matches in both files.")
84
- else:
85
- st.warning("Both files must be loaded before comparison.")
86
-
87
-
88
- if __name__ == "__main__":
89
- if "headers1" not in st.session_state:
90
- st.session_state["headers1"] = None
91
- if "headers2" not in st.session_state:
92
- st.session_state["headers2"] = None
93
- main()
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from io import StringIO
4
+
5
+ def load_file(file, header_row):
6
+ if file is None:
7
+ return None, None
8
+ try:
9
+ if file.name.endswith(".xlsx"):
10
+ df = pd.read_excel(file, header=header_row - 1)
11
+ else:
12
+ df = pd.read_csv(StringIO(file.getvalue().decode("utf-8")), header=header_row - 1)
13
+ headers = df.columns.tolist()
14
+ return headers, df
15
+ except Exception as e:
16
+ st.error(f"Error loading file: {e}")
17
+ return None, None
18
+
19
+ def main():
20
+ st.title("File Header and Data Comparison Tool")
21
+
22
+ # First file upload
23
+ st.header("Upload First File")
24
+ if "file1" not in st.session_state:
25
+ st.session_state["file1"] = None
26
+ st.session_state["headers1"] = None
27
+ st.session_state["df1"] = None
28
+
29
+ file1 = st.file_uploader("Choose the first file (CSV or Excel)", type=["csv", "xlsx"], key="file1_uploader")
30
+ if file1:
31
+ header_row1 = st.number_input("Specify the row number for headers in the first file", min_value=1, value=1, key="header_row1")
32
+ if st.button("Load First File", key="load_file1"):
33
+ headers1, df1 = load_file(file1, header_row1)
34
+ if headers1:
35
+ st.session_state["file1"] = file1
36
+ st.session_state["headers1"] = headers1
37
+ st.session_state["df1"] = df1
38
+ st.success(f"Headers from the first file: {headers1}")
39
+ else:
40
+ st.error("Failed to load the first file.")
41
+
42
+ # Second file upload
43
+ st.header("Upload Second File")
44
+ if "file2" not in st.session_state:
45
+ st.session_state["file2"] = None
46
+ st.session_state["headers2"] = None
47
+ st.session_state["df2"] = None
48
+
49
+ file2 = st.file_uploader("Choose the second file (CSV or Excel)", type=["csv", "xlsx"], key="file2_uploader")
50
+ if file2:
51
+ header_row2 = st.number_input("Specify the row number for headers in the second file", min_value=1, value=1, key="header_row2")
52
+ if st.button("Load Second File", key="load_file2"):
53
+ headers2, df2 = load_file(file2, header_row2)
54
+ if headers2:
55
+ st.session_state["file2"] = file2
56
+ st.session_state["headers2"] = headers2
57
+ st.session_state["df2"] = df2
58
+ st.success(f"Headers from the second file: {headers2}")
59
+ else:
60
+ st.error("Failed to load the second file.")
61
+
62
+ # Compare headers
63
+ if st.session_state["headers1"] and st.session_state["headers2"]:
64
+ headers1 = st.session_state["headers1"]
65
+ headers2 = st.session_state["headers2"]
66
+
67
+ st.header("Header Comparison Results")
68
+ missing_in_file2 = [header for header in headers1 if header not in headers2]
69
+ missing_in_file1 = [header for header in headers2 if header not in headers1]
70
+
71
+ if missing_in_file2 or missing_in_file1:
72
+ st.write("Differences in headers:")
73
+ if missing_in_file2:
74
+ st.write(f"Headers in File 1 but not in File 2: {missing_in_file2}")
75
+ if missing_in_file1:
76
+ st.write(f"Headers in File 2 but not in File 1: {missing_in_file1}")
77
+ else:
78
+ st.success("Headers match perfectly!")
79
+
80
+ # Compare column values where headers match
81
+ common_headers = [header for header in headers1 if header in headers2]
82
+ if common_headers:
83
+ st.header("Column Value Differences")
84
+ df1 = st.session_state["df1"]
85
+ df2 = st.session_state["df2"]
86
+
87
+ for header in common_headers:
88
+ col1 = df1[header].dropna().unique()
89
+ col2 = df2[header].dropna().unique()
90
+
91
+ if not pd.Series(col1).equals(pd.Series(col2)):
92
+ st.write(f"Column '{header}' differs between the files.")
93
+ st.write(f"Values in File 1 but not in File 2: {set(col1) - set(col2)}")
94
+ st.write(f"Values in File 2 but not in File 1: {set(col2) - set(col1)}")
95
+ else:
96
+ st.write(f"Column '{header}' matches in both files.")
97
+
98
+ if __name__ == "__main__":
99
+ main()