# Updated NamedEntityRecognitionTool in ner_tool.py from transformers import pipeline from transformers import Tool class NamedEntityRecognitionTool(Tool): name = "ner_tool" description = "Identifies and labels various entities in a given text." inputs = ["text"] outputs = ["text"] def __call__(self, text: str): # Initialize the named entity recognition pipeline ner_analyzer = pipeline("ner") # Perform named entity recognition on the input text entities = ner_analyzer(text) # Prepare a list to store word-level entities word_entities = [] # Initialize variables to track the current word and its label current_word = "" current_label = None for entity in entities: label = entity.get("entity", "UNKNOWN") word = entity.get("word", "") start = entity.get("start", -1) end = entity.get("end", -1) # Extract the complete entity text entity_text = text[start:end].strip() # Check for multi-token entities if "##" in word: # Concatenate sub-tokens to form the complete word current_word += entity_text current_label = label else: # If it's the first token of a new word, add the previous word to the list if current_word: word_entities.append({"word": current_word, "label": current_label, "entity_text": current_word}) current_word = "" current_label = None # Add the current token as a new word word_entities.append({"word": word, "label": label, "entity_text": entity_text}) # Check for any remaining word if current_word: word_entities.append({"word": current_word, "label": current_label, "entity_text": current_word}) # Print the identified word-level entities print(f"Word-level Entities: {word_entities}") return {"entities": word_entities} # Return a dictionary with the specified output component