| | import pandas as pd |
| | import re |
| | from typing import Any |
| |
|
| | |
| | SPECIAL_ONLY_REGEX = re.compile(r'^[^A-Za-z0-9]+$') |
| |
|
| | def is_numeric_or_special(s: Any) -> bool: |
| | """ |
| | Check if the provided value is numeric or consists solely of special characters. |
| | |
| | Parameters: |
| | s (Any): The input value to check. |
| | |
| | Returns: |
| | bool: True if the value is numeric or special-only, False otherwise. |
| | """ |
| | if pd.isnull(s): |
| | return False |
| | |
| | s = str(s).strip() |
| |
|
| | |
| | try: |
| | float(s) |
| | return True |
| | except ValueError: |
| | pass |
| |
|
| | |
| | if SPECIAL_ONLY_REGEX.match(s): |
| | return True |
| |
|
| | return False |
| |
|
| | def remove_numeric_or_special_responses(df: pd.DataFrame, target_col: str) -> pd.DataFrame: |
| | """ |
| | Remove rows from the DataFrame where the target column's value is either numeric or |
| | consists solely of special characters. |
| | |
| | Parameters: |
| | df (pd.DataFrame): The input DataFrame. |
| | target_col (str): The name of the column to filter. |
| | |
| | Returns: |
| | pd.DataFrame: A DataFrame with the undesired responses removed. |
| | """ |
| | filtered_df = df[~df[target_col].map(is_numeric_or_special)].reset_index(drop=True) |
| | return filtered_df |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | import pandas as pd |
| | import datetime |
| | from dateutil import parser |
| |
|
| | def robust_convert_date(date_series): |
| | """ |
| | Convert a pandas Series containing dates in various formats to datetime objects. |
| | |
| | This function tries: |
| | 1. The built-in pd.to_datetime() with infer_datetime_format and dayfirst options. |
| | 2. Falls back to dateutil.parser.parse for any values that remain unparsed. |
| | |
| | Parameters: |
| | date_series (pd.Series): A pandas Series with date values (as strings, numbers, etc.) |
| | |
| | Returns: |
| | pd.Series: A Series of datetime objects (or pd.NaT if conversion fails) |
| | """ |
| | def convert_single(x): |
| | |
| | if pd.isnull(x): |
| | return pd.NaT |
| | if isinstance(x, (pd.Timestamp, datetime.datetime)): |
| | return x |
| | |
| | dt = pd.to_datetime(x, errors='coerce', infer_datetime_format=True, dayfirst=True) |
| | if pd.notnull(dt): |
| | return dt |
| | |
| | try: |
| | return parser.parse(str(x), dayfirst=True) |
| | except Exception: |
| | return pd.NaT |
| |
|
| | return date_series.apply(convert_single) |
| |
|
| |
|