presidio commited on
Commit
f638625
1 Parent(s): ec2ae46

Delete text_analytics_wrapper.py

Browse files
Files changed (1) hide show
  1. text_analytics_wrapper.py +0 -123
text_analytics_wrapper.py DELETED
@@ -1,123 +0,0 @@
1
- import os
2
- from typing import List, Optional
3
- import logging
4
- import dotenv
5
- from azure.ai.textanalytics import TextAnalyticsClient
6
- from azure.core.credentials import AzureKeyCredential
7
-
8
- from presidio_analyzer import EntityRecognizer, RecognizerResult, AnalysisExplanation
9
- from presidio_analyzer.nlp_engine import NlpArtifacts
10
-
11
- logger = logging.getLogger("presidio-streamlit")
12
-
13
- class TextAnalyticsWrapper(EntityRecognizer):
14
- from azure.ai.textanalytics._models import PiiEntityCategory
15
- TA_SUPPORTED_ENTITIES = [r.value for r in PiiEntityCategory]
16
-
17
- def __init__(
18
- self,
19
- supported_entities: Optional[List[str]] = None,
20
- supported_language: str = "en",
21
- ta_client: Optional[TextAnalyticsClient] = None,
22
- ta_key: Optional[str] = None,
23
- ta_endpoint: Optional[str] = None,
24
- ):
25
- """
26
- Wrapper for the Azure Text Analytics client
27
- :param ta_client: object of type TextAnalyticsClient
28
- :param ta_key: Azure cognitive Services for Language key
29
- :param ta_endpoint: Azure cognitive Services for Language endpoint
30
- """
31
-
32
- if not supported_entities:
33
- supported_entities = self.TA_SUPPORTED_ENTITIES
34
-
35
- super().__init__(
36
- supported_entities=supported_entities,
37
- supported_language=supported_language,
38
- name="Azure Text Analytics PII",
39
- )
40
-
41
- self.ta_key = ta_key
42
- self.ta_endpoint = ta_endpoint
43
-
44
- if not ta_client:
45
- ta_client = self.__authenticate_client(ta_key, ta_endpoint)
46
- self.ta_client = ta_client
47
-
48
- @staticmethod
49
- def __authenticate_client(key: str, endpoint: str):
50
- ta_credential = AzureKeyCredential(key)
51
- text_analytics_client = TextAnalyticsClient(
52
- endpoint=endpoint, credential=ta_credential
53
- )
54
- return text_analytics_client
55
-
56
- def analyze(
57
- self, text: str, entities: List[str] = None, nlp_artifacts: NlpArtifacts = None
58
- ) -> List[RecognizerResult]:
59
- if not entities:
60
- entities = []
61
- response = self.ta_client.recognize_pii_entities(
62
- [text], language=self.supported_language
63
- )
64
- results = [doc for doc in response if not doc.is_error]
65
- recognizer_results = []
66
- for res in results:
67
- for entity in res.entities:
68
- if entity.category not in self.supported_entities:
69
- continue
70
- analysis_explanation = TextAnalyticsWrapper._build_explanation(
71
- original_score=entity.confidence_score,
72
- entity_type=entity.category,
73
- )
74
- recognizer_results.append(
75
- RecognizerResult(
76
- entity_type=entity.category,
77
- start=entity.offset,
78
- end=entity.offset + len(entity.text),
79
- score=entity.confidence_score,
80
- analysis_explanation=analysis_explanation,
81
- )
82
- )
83
-
84
- return recognizer_results
85
-
86
- @staticmethod
87
- def _build_explanation(
88
- original_score: float, entity_type: str
89
- ) -> AnalysisExplanation:
90
- explanation = AnalysisExplanation(
91
- recognizer=TextAnalyticsWrapper.__class__.__name__,
92
- original_score=original_score,
93
- textual_explanation=f"Identified as {entity_type} by Text Analytics",
94
- )
95
- return explanation
96
-
97
- def load(self) -> None:
98
- pass
99
-
100
-
101
- if __name__ == "__main__":
102
- import presidio_helpers
103
- dotenv.load_dotenv()
104
- text = """
105
- Here are a few example sentences we currently support:
106
-
107
- Hello, my name is David Johnson and I live in Maine.
108
- My credit card number is 4095-2609-9393-4932 and my crypto wallet id is 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ.
109
-
110
- On September 18 I visited microsoft.com and sent an email to test@presidio.site, from the IP 192.168.0.1.
111
-
112
- My passport: 191280342 and my phone number: (212) 555-1234.
113
-
114
- This is a valid International Bank Account Number: IL150120690000003111111 . Can you please check the status on bank account 954567876544?
115
-
116
- Kate's social security number is 078-05-1126. Her driver license? it is 1234567A.
117
- """
118
- analyzer = presidio_helpers.analyzer_engine(
119
- model_path="Azure Text Analytics PII",
120
- ta_key=os.environ["TA_KEY"],
121
- ta_endpoint=os.environ["TA_ENDPOINT"],
122
- )
123
- analyzer.analyze(text=text, language="en")