Spaces:

DocSrvNyk
/

sccofd_1

Sleeping

App Files Files Community

DocSrvNyk commited on Sep 16, 2023

Commit

79c5ac1

•

1 Parent(s): 0962801

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -4,12 +4,17 @@ import os
 import zipfile
 def process_csv(uploaded_file):
     # Load the data from the uploaded file's byte stream
     data = pd.read_csv(uploaded_file.name)
-    # Dictionary to store column name and its mapping of original values to codes
-    legend_dict = {}
     # List to store the details of columns where data was added
     data_added_details = []
@@ -20,7 +25,8 @@ def process_csv(uploaded_file):
         if data[col].dtype == 'object' or (data[col].nunique() < 6 and pd.api.types.is_numeric_dtype(data[col])):
             # Create a mapping of original values to codes, including NaN or blank values mapped to -9999
             mapping = {value: code if pd.notna(value) else -9999 for code, value in enumerate(data[col].unique())}
-            legend_dict[col] = mapping
             # Replace the values in the column with their respective codes
             data[col] = data[col].map(mapping)
         elif pd.api.types.is_numeric_dtype(data[col]) and any(pd.isna(data[col])):
@@ -29,7 +35,7 @@ def process_csv(uploaded_file):
             data[col].fillna(median_value, inplace=True)
             data_added_details.append([col, "Median", median_value])
-    # Name of the zip file based on uploaded file name
     zip_name = "processed_files.zip"
     # Save CSV files and add them to the zip file
@@ -37,9 +43,9 @@ def process_csv(uploaded_file):
         data.to_csv("modified_data.csv", index=False)
         zipf.write("modified_data.csv")
-        legend_df = pd.DataFrame(list(legend_dict.items()), columns=['Column', 'Mapping'])
-        legend_df.to_csv("legend.csv", index=False)
-        zipf.write("legend.csv")
         data_added_df = pd.DataFrame(data_added_details, columns=['Column', 'Method', 'Value Added'])
         data_added_df.to_csv("data_added_details.csv", index=False)

 import zipfile
 def process_csv(uploaded_file):
+    """
+    Process the uploaded CSV file to:
+    1. Replace text-based columns and numerical columns with less than six unique options with coded values.
+    2. Fill missing values in numerical columns with their respective medians.
+    3. Return a zip file containing the modified CSV file, a legend CSV, and a CSV detailing data fill methods.
+    """
     # Load the data from the uploaded file's byte stream
     data = pd.read_csv(uploaded_file.name)
+    # List to store mappings of columns
+    mapping_list = []
     # List to store the details of columns where data was added
     data_added_details = []
         if data[col].dtype == 'object' or (data[col].nunique() < 6 and pd.api.types.is_numeric_dtype(data[col])):
             # Create a mapping of original values to codes, including NaN or blank values mapped to -9999
             mapping = {value: code if pd.notna(value) else -9999 for code, value in enumerate(data[col].unique())}
+            for original_value, mapped_value in mapping.items():
+                mapping_list.append([col, original_value, mapped_value])
             # Replace the values in the column with their respective codes
             data[col] = data[col].map(mapping)
         elif pd.api.types.is_numeric_dtype(data[col]) and any(pd.isna(data[col])):
             data[col].fillna(median_value, inplace=True)
             data_added_details.append([col, "Median", median_value])
+    # Name of the zip file
     zip_name = "processed_files.zip"
     # Save CSV files and add them to the zip file
         data.to_csv("modified_data.csv", index=False)
         zipf.write("modified_data.csv")
+        mapping_df = pd.DataFrame(mapping_list, columns=['Column', 'Original Value', 'Mapped Value'])
+        mapping_df.to_csv("mapping.csv", index=False)
+        zipf.write("mapping.csv")
         data_added_df = pd.DataFrame(data_added_details, columns=['Column', 'Method', 'Value Added'])
         data_added_df.to_csv("data_added_details.csv", index=False)