| # parallel_utils.py | |
| from joblib import Parallel, delayed | |
| import pandas as pd | |
| from .data_augmentation import generate_expression_instructions, generate_expression_instruction | |
| def augment_dataframe_parallel(df, expression_col="expression", n_jobs=-1): | |
| """ | |
| Parallelized augmentation of a DataFrame with math expressions. | |
| Args: | |
| df (pd.DataFrame): DataFrame with a column of expressions. | |
| expression_col (str): Name of the column with expressions. | |
| n_jobs (int): Number of parallel workers (-1 = all cores). | |
| Returns: | |
| pd.DataFrame: Original DataFrame with new instruction columns. | |
| """ | |
| expressions = df[expression_col].tolist() | |
| augmented_data = Parallel(n_jobs=n_jobs)( | |
| delayed(generate_expression_instruction)(expr) for expr in expressions | |
| ) | |
| df_aug = df.copy() | |
| df_aug["instruction"] = [item["instriction"] for item in augmented_data] | |
| #df_aug["simple"] = [item["Simple_Instruct"] for item in augmented_data] | |
| #df_aug["key_value"] = [item["Key_Value"] for item in augmented_data] | |
| #df_aug["delimiter"] = [item["Delimiter_Based"] for item in augmented_data] | |
| #df_aug["minimalist"] = [item["Minimalist"] for item in augmented_data] | |
| return df_aug | |