Spaces:
Sleeping
Sleeping
from transformers import TokenClassificationPipeline, AutoModelForTokenClassification, AutoTokenizer | |
from transformers.pipelines import AggregationStrategy | |
import numpy as np | |
import configparser | |
config = configparser.ConfigParser() | |
config.read("src/configs/config.cfg") | |
embed_config = config["EMBEDDINGS"] | |
class KeyphraseExtractionPipeline(TokenClassificationPipeline): | |
def __init__(self,): | |
super().__init__( | |
model=AutoModelForTokenClassification.from_pretrained(str(embed_config["KEYWORD_EXTRACTOR"])), | |
tokenizer=AutoTokenizer.from_pretrained(embed_config["KEYWORD_EXTRACTOR"], device_map = 'cuda') | |
) | |
def postprocess(self, all_outputs): | |
results = super().postprocess( | |
all_outputs=all_outputs, | |
aggregation_strategy=AggregationStrategy.FIRST, | |
) | |
return np.unique([result.get("word").strip() for result in results]) |