File size: 1,299 Bytes
628fe8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import uuid

def process_tokens(tokens, inverse_uuid_map=None, uuid_map=None, embeddings_model=None, fhe_ner_detection=None, client=None):
    """Processes tokens based on the provided parameters for either deanonymizing, anonymizing or default processing."""
    processed_tokens = []
    for token in tokens:
        if not token.strip() or not re.match(r"\w+", token):  # Directly append non-word tokens or whitespace
            processed_tokens.append(token)
            continue
        if inverse_uuid_map is not None:  # For deanonymizing response
            processed_tokens.append(inverse_uuid_map.get(token, token))
        elif uuid_map is not None and embeddings_model is not None and fhe_ner_detection is not None and client is not None:  # For FHEAnonymizer call
            x = embeddings_model.wv[token][None]
            prediction_proba = fhe_ner_detection.predict_proba(x)
            probability = prediction_proba[0][1]
            if probability >= 0.5:
                tmp_uuid = uuid_map.get(token, str(uuid.uuid4())[:8])
                processed_tokens.append(tmp_uuid)
                uuid_map[token] = tmp_uuid
            else:
                processed_tokens.append(token)
        else:
            processed_tokens.append(token)
    return ''.join(processed_tokens)