EddyGiusepe commited on
Commit
7dbdab5
1 Parent(s): b514fe8

NER and logging

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. 4_Entity_and_logging.py +84 -0
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  # EddyGiusepe
2
  venv_logging/
 
 
1
  # EddyGiusepe
2
  venv_logging/
3
+ reconhecimento_de_entidade.log
4_Entity_and_logging.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro
5
+
6
+ Objetivo: Neste script utilizamos um modelo pré-treinado para extrair
7
+ Entidades e usamos o pacote logging do python para registrar
8
+ nossos LOGs.
9
+ """
10
+ import logging
11
+ from transformers import pipeline
12
+
13
+ class EntityRecognizer:
14
+ def __init__(self, model_name="Babelscape/wikineural-multilingual-ner"): # https://huggingface.co/Babelscape/wikineural-multilingual-ner
15
+ self.model = self.load_model(model_name)
16
+ self.logger = self.setup_logger()
17
+
18
+ def load_model(self, model_name="Babelscape/wikineural-multilingual-ner"):
19
+ # Carrego o modelo pré-treinado do Hugging Face:
20
+ return pipeline("ner", model=model_name, tokenizer=model_name)
21
+
22
+ def setup_logger(self):
23
+ # Configuração de Logs:
24
+ logger = logging.getLogger(__name__)
25
+ logger.setLevel(logging.INFO)
26
+
27
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
28
+
29
+ file_handler = logging.FileHandler('reconhecimento_de_entidade.log')
30
+ file_handler.setLevel(logging.INFO)
31
+ file_handler.setFormatter(formatter)
32
+
33
+ logger.addHandler(file_handler)
34
+
35
+ return logger
36
+
37
+ def recognize_entities(self, text):
38
+ # Use o modelo NER pré-treinado para reconhecer entidades no texto:
39
+ entities = self.model(text)
40
+
41
+ recognized_entities = []
42
+
43
+ for entity in entities:
44
+ entity_text = entity['word']
45
+ entity_type = entity['entity']
46
+ recognized_entities.append((entity_text, entity_type))
47
+
48
+ self.logger.info(f"Entidades reconhecidas: {recognized_entities}")
49
+
50
+ return recognized_entities
51
+
52
+ def process_classification_result(self, tokens_and_tags):
53
+ result = {}
54
+ current_type = None
55
+ current_entity = ""
56
+
57
+ for token, tag in tokens_and_tags:
58
+ if tag.startswith("B-"):
59
+ if current_type is not None and current_entity:
60
+ result[current_entity] = current_type
61
+ current_type = tag[2:]
62
+ current_entity = token
63
+ elif tag.startswith("I-"):
64
+ current_entity += " " + token
65
+
66
+ if current_type is not None and current_entity:
67
+ result[current_entity] = current_type
68
+
69
+ return result
70
+
71
+
72
+ if __name__ == "__main__":
73
+ # Exemplo de uso:
74
+ #model_name = "Babelscape/wikineural-multilingual-ner"
75
+ #text = "O Eddwin e a Karina foram para Estados Unidos a estudar em Harvard."
76
+ text = "Eddy e Karina compraram uns tênis na loja Nike."
77
+ entity_recognizer = EntityRecognizer() # entity_recognizer = EntityRecognizer(model_name)
78
+ recognized = entity_recognizer.recognize_entities(text)
79
+ print(recognized)
80
+ print("🤗🤗🤗")
81
+
82
+ result = entity_recognizer.process_classification_result(recognized)
83
+ result = {k.replace(" ##", ""): v for k, v in result.items()} # Remove '##' from keys
84
+ print(result)