Spaces:
Sleeping
Sleeping
Upload clean.py
Browse files
clean.py
CHANGED
@@ -155,11 +155,11 @@ def clean_column(df, column_name):
|
|
155 |
|
156 |
# Convert column to determined data type
|
157 |
if data_type == "float":
|
158 |
-
df
|
159 |
elif data_type == "integer":
|
160 |
-
df
|
161 |
elif data_type == "date":
|
162 |
-
df[column_name] = pd.to_datetime(df[column_name], errors='coerce')
|
163 |
elif data_type == "string" or data_type == "object":
|
164 |
# Transform string values
|
165 |
transform_result = transform_string_column(column_data, column_name)
|
@@ -178,8 +178,10 @@ def clean_column(df, column_name):
|
|
178 |
print(f" Potential typos found: {typo_result['typos']}")
|
179 |
|
180 |
# Set empty and invalid cells to NaN
|
181 |
-
|
182 |
-
|
|
|
|
|
183 |
|
184 |
return df, nonconforming_cells
|
185 |
|
|
|
155 |
|
156 |
# Convert column to determined data type
|
157 |
if data_type == "float":
|
158 |
+
df[column_name] = pd.to_numeric(df[column_name], errors='coerce')
|
159 |
elif data_type == "integer":
|
160 |
+
df[column_name] = pd.to_numeric(df[column_name], errors='coerce').astype('Int64')
|
161 |
elif data_type == "date":
|
162 |
+
df[column_name] = pd.to_datetime(df[column_name], errors='coerce', dayfirst=True)
|
163 |
elif data_type == "string" or data_type == "object":
|
164 |
# Transform string values
|
165 |
transform_result = transform_string_column(column_data, column_name)
|
|
|
178 |
print(f" Potential typos found: {typo_result['typos']}")
|
179 |
|
180 |
# Set empty and invalid cells to NaN
|
181 |
+
indices_to_set_nan = set(empty_indices + invalid_indices)
|
182 |
+
existing_indices = df.index.intersection(indices_to_set_nan)
|
183 |
+
df.loc[existing_indices, column_name] = np.nan
|
184 |
+
nonconforming_cells = len(existing_indices)
|
185 |
|
186 |
return df, nonconforming_cells
|
187 |
|