Spaces:
Running
Running
File size: 761 Bytes
492deb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from pyspark.sql.functions import col
def apply_masking(df, schema):
"""
Apply masking UDFs to specified columns based on schema.
Schema = { "original_col": "mask_type" }
"""
from .masking import (
mask_email_udf, mask_name_udf, mask_date_udf,
mask_ssn_udf, mask_itin_udf, mask_phone_udf
)
masking_map = {
"email": mask_email_udf,
"name": mask_name_udf,
"dob": mask_date_udf,
"ssn": mask_ssn_udf,
"itin": mask_itin_udf,
"phone": mask_phone_udf,
}
for col_name, mask_type in schema.items():
if mask_type in masking_map:
df = df.withColumn(f"masked_{col_name}", masking_map[mask_type](col(col_name)))
return df
|