Update loading_file.py
Browse files- loading_file.py +26 -42
loading_file.py
CHANGED
@@ -1,45 +1,29 @@
|
|
1 |
-
import requests
|
2 |
-
from bs4 import BeautifulSoup
|
3 |
-
from io import BytesIO
|
4 |
import streamlit as st
|
|
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
)
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
break
|
33 |
-
|
34 |
-
if file_url:
|
35 |
-
if not file_url.startswith('http'):
|
36 |
-
file_url = requests.compat.urljoin(url, file_url)
|
37 |
-
|
38 |
-
file_response = requests.get(file_url, headers=headers)
|
39 |
-
if file_response.status_code == 200:
|
40 |
-
return BytesIO(file_response.content), file_name
|
41 |
-
else:
|
42 |
-
st.error(f"Failed to download the file. Status code: {file_response.status_code}")
|
43 |
-
else:
|
44 |
-
st.error(f"Failed to retrieve the webpage. Status code: {response.status_code}")
|
45 |
-
return None, None
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
|
4 |
+
# Store cleaned dataset globally for access in other steps
|
5 |
+
cleaned_data = None
|
6 |
|
7 |
+
def load_and_clean_data(ods_file, file_name):
|
8 |
+
global cleaned_data # To make it accessible in other files
|
9 |
+
# Load the dataset and clean it as done before
|
10 |
+
df = pd.read_excel(ods_file, engine='odf')
|
11 |
+
df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
|
12 |
+
df.dropna(how='all', inplace=True)
|
13 |
+
df.reset_index(drop=True, inplace=True)
|
14 |
+
|
15 |
+
# Clean column names
|
16 |
+
for idx, row in df.iterrows():
|
17 |
+
if row['Unnamed: 2'] == 'Application Number' and row['Unnamed: 3'] == 'Decision':
|
18 |
+
df.columns = ['Application Number', 'Decision']
|
19 |
+
df = df.iloc[idx + 1:]
|
20 |
+
break
|
21 |
+
df.reset_index(drop=True, inplace=True)
|
22 |
+
df['Application Number'] = df['Application Number'].astype(str)
|
23 |
+
|
24 |
+
# Save the cleaned data globally
|
25 |
+
cleaned_data = df
|
26 |
+
|
27 |
+
# Display success
|
28 |
+
st.success(f"Data successfully loaded and cleaned: {file_name}")
|
29 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|