JonathanEGP commited on
Commit
a2ee67b
·
verified ·
1 Parent(s): 54741b9

Update Anonimizador_Ner.txt

Browse files
Files changed (1) hide show
  1. Anonimizador_Ner.txt +42 -20
Anonimizador_Ner.txt CHANGED
@@ -1,20 +1,42 @@
1
- from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
2
-
3
- class AnonymizationPipeline:
4
- def __init__(self, model_name):
5
- self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
6
- self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
7
- self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
8
-
9
- def anonymize(self, text):
10
- entities = self.ner_pipeline(text)
11
-
12
- entities.sort(key=lambda x: x['end'], reverse=True)
13
-
14
- for entity in entities:
15
- start = entity['start']
16
- end = entity['end']
17
- entity_type = entity['entity']
18
- text = text[:start] + f"[{entity_type}]" + text[end:]
19
-
20
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Pipeline, AutoModelForTokenClassification, AutoTokenizer
2
+
3
+ class AnonymizationPipeline(Pipeline):
4
+ def __init__(self, model=None, tokenizer=None, **kwargs):
5
+ super().__init__(model=model, tokenizer=tokenizer, **kwargs)
6
+
7
+ if self.model is None:
8
+ self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
9
+ if self.tokenizer is None:
10
+ self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
11
+
12
+ self.ner_pipeline = Pipeline("ner", model=self.model, tokenizer=self.tokenizer)
13
+
14
+ def _sanitize_parameters(self, **kwargs):
15
+ return {}, {}, {} # No additional parameters needed for now
16
+
17
+ def preprocess(self, text):
18
+ return {"text": text}
19
+
20
+ def _forward(self, model_inputs):
21
+ text = model_inputs["text"]
22
+ entities = self.ner_pipeline(text)
23
+ return {"text": text, "entities": entities}
24
+
25
+ def postprocess(self, model_outputs):
26
+ text = model_outputs["text"]
27
+ entities = model_outputs["entities"]
28
+
29
+ # Ordenar las entidades de final a principio para no afectar los índices
30
+ entities.sort(key=lambda x: x['end'], reverse=True)
31
+
32
+ # Reemplazar las entidades con sus etiquetas
33
+ for entity in entities:
34
+ start = entity['start']
35
+ end = entity['end']
36
+ entity_type = entity['entity']
37
+ text = text[:start] + f"[{entity_type}]" + text[end:]
38
+
39
+ return {"anonymized_text": text}
40
+
41
+ def __call__(self, text, **kwargs):
42
+ return super().__call__(text, **kwargs)