JonathanEGP
/

Anonimizador_Ner

Inference Endpoints

Model card Files Files and versions Community

JonathanEGP commited on Jul 13, 2024

Commit

a2ee67b

·

verified ·

1 Parent(s): 54741b9

Update Anonimizador_Ner.txt

Files changed (1) hide show

Anonimizador_Ner.txt +42 -20

Anonimizador_Ner.txt CHANGED Viewed

@@ -1,20 +1,42 @@
-from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
-class AnonymizationPipeline:
-    def __init__(self, model_name):
-        self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
-        self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
-        self.ner_pipeline = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
-    def anonymize(self, text):
-        entities = self.ner_pipeline(text)
-        entities.sort(key=lambda x: x['end'], reverse=True)
-        for entity in entities:
-            start = entity['start']
-            end = entity['end']
-            entity_type = entity['entity']
-            text = text[:start] + f"[{entity_type}]" + text[end:]
-        return text

+from transformers import Pipeline, AutoModelForTokenClassification, AutoTokenizer
+class AnonymizationPipeline(Pipeline):
+    def __init__(self, model=None, tokenizer=None, **kwargs):
+        super().__init__(model=model, tokenizer=tokenizer, **kwargs)
+        if self.model is None:
+            self.model = AutoModelForTokenClassification.from_pretrained("JonathanEGP/Beto_Ner")
+        if self.tokenizer is None:
+            self.tokenizer = AutoTokenizer.from_pretrained("JonathanEGP/Beto_Ner")
+        self.ner_pipeline = Pipeline("ner", model=self.model, tokenizer=self.tokenizer)
+    def _sanitize_parameters(self, **kwargs):
+        return {}, {}, {}  # No additional parameters needed for now
+    def preprocess(self, text):
+        return {"text": text}
+    def _forward(self, model_inputs):
+        text = model_inputs["text"]
+        entities = self.ner_pipeline(text)
+        return {"text": text, "entities": entities}
+    def postprocess(self, model_outputs):
+        text = model_outputs["text"]
+        entities = model_outputs["entities"]
+        # Ordenar las entidades de final a principio para no afectar los índices
+        entities.sort(key=lambda x: x['end'], reverse=True)
+        # Reemplazar las entidades con sus etiquetas
+        for entity in entities:
+            start = entity['start']
+            end = entity['end']
+            entity_type = entity['entity']
+            text = text[:start] + f"[{entity_type}]" + text[end:]
+        return {"anonymized_text": text}
+    def __call__(self, text, **kwargs):
+        return super().__call__(text, **kwargs)