Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,17 @@ import os
|
|
4 |
import zipfile
|
5 |
|
6 |
def process_csv(uploaded_file):
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
8 |
# Load the data from the uploaded file's byte stream
|
9 |
data = pd.read_csv(uploaded_file.name)
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
|
14 |
# List to store the details of columns where data was added
|
15 |
data_added_details = []
|
@@ -20,7 +25,8 @@ def process_csv(uploaded_file):
|
|
20 |
if data[col].dtype == 'object' or (data[col].nunique() < 6 and pd.api.types.is_numeric_dtype(data[col])):
|
21 |
# Create a mapping of original values to codes, including NaN or blank values mapped to -9999
|
22 |
mapping = {value: code if pd.notna(value) else -9999 for code, value in enumerate(data[col].unique())}
|
23 |
-
|
|
|
24 |
# Replace the values in the column with their respective codes
|
25 |
data[col] = data[col].map(mapping)
|
26 |
elif pd.api.types.is_numeric_dtype(data[col]) and any(pd.isna(data[col])):
|
@@ -29,7 +35,7 @@ def process_csv(uploaded_file):
|
|
29 |
data[col].fillna(median_value, inplace=True)
|
30 |
data_added_details.append([col, "Median", median_value])
|
31 |
|
32 |
-
# Name of the zip file
|
33 |
zip_name = "processed_files.zip"
|
34 |
|
35 |
# Save CSV files and add them to the zip file
|
@@ -37,9 +43,9 @@ def process_csv(uploaded_file):
|
|
37 |
data.to_csv("modified_data.csv", index=False)
|
38 |
zipf.write("modified_data.csv")
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
zipf.write("
|
43 |
|
44 |
data_added_df = pd.DataFrame(data_added_details, columns=['Column', 'Method', 'Value Added'])
|
45 |
data_added_df.to_csv("data_added_details.csv", index=False)
|
|
|
4 |
import zipfile
|
5 |
|
6 |
def process_csv(uploaded_file):
|
7 |
+
"""
|
8 |
+
Process the uploaded CSV file to:
|
9 |
+
1. Replace text-based columns and numerical columns with less than six unique options with coded values.
|
10 |
+
2. Fill missing values in numerical columns with their respective medians.
|
11 |
+
3. Return a zip file containing the modified CSV file, a legend CSV, and a CSV detailing data fill methods.
|
12 |
+
"""
|
13 |
# Load the data from the uploaded file's byte stream
|
14 |
data = pd.read_csv(uploaded_file.name)
|
15 |
|
16 |
+
# List to store mappings of columns
|
17 |
+
mapping_list = []
|
18 |
|
19 |
# List to store the details of columns where data was added
|
20 |
data_added_details = []
|
|
|
25 |
if data[col].dtype == 'object' or (data[col].nunique() < 6 and pd.api.types.is_numeric_dtype(data[col])):
|
26 |
# Create a mapping of original values to codes, including NaN or blank values mapped to -9999
|
27 |
mapping = {value: code if pd.notna(value) else -9999 for code, value in enumerate(data[col].unique())}
|
28 |
+
for original_value, mapped_value in mapping.items():
|
29 |
+
mapping_list.append([col, original_value, mapped_value])
|
30 |
# Replace the values in the column with their respective codes
|
31 |
data[col] = data[col].map(mapping)
|
32 |
elif pd.api.types.is_numeric_dtype(data[col]) and any(pd.isna(data[col])):
|
|
|
35 |
data[col].fillna(median_value, inplace=True)
|
36 |
data_added_details.append([col, "Median", median_value])
|
37 |
|
38 |
+
# Name of the zip file
|
39 |
zip_name = "processed_files.zip"
|
40 |
|
41 |
# Save CSV files and add them to the zip file
|
|
|
43 |
data.to_csv("modified_data.csv", index=False)
|
44 |
zipf.write("modified_data.csv")
|
45 |
|
46 |
+
mapping_df = pd.DataFrame(mapping_list, columns=['Column', 'Original Value', 'Mapped Value'])
|
47 |
+
mapping_df.to_csv("mapping.csv", index=False)
|
48 |
+
zipf.write("mapping.csv")
|
49 |
|
50 |
data_added_df = pd.DataFrame(data_added_details, columns=['Column', 'Method', 'Value Added'])
|
51 |
data_added_df.to_csv("data_added_details.csv", index=False)
|