File size: 1,060 Bytes
e0c264d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# tests/test_schema_masking.py

import sys
import os

sys.path.append("/content/anonyspark")

from pyspark.sql import SparkSession
from anonyspark.utils import apply_masking

def test_schema_masking():
    spark = SparkSession.builder.master("local[1]").appName("Test").getOrCreate()

    df = spark.createDataFrame([{
        "email": "john@example.com",
        "name": "John",
        "dob": "1991-08-14",
        "ssn": "123-45-6789",
        "itin": "912-73-1234",
        "phone": "123-456-7890"
    }])

    schema = {
        "email": "email",
        "name": "name",
        "dob": "dob",
        "ssn": "ssn",
        "itin": "itin",
        "phone": "phone"
    }

    masked_df = apply_masking(df, schema)
    result = masked_df.collect()[0].asDict()

    assert result["masked_email"] == "***@example.com"
    assert result["masked_name"] == "J***"
    assert result["masked_dob"] == "***-**-14"
    assert result["masked_ssn"] == "***-**-6789"
    assert result["masked_itin"] == "***-**-1234"
    assert result["masked_phone"] == "***-***-7890"