import gradio from transformers import pipeline # Merge split tokens starting with '##' def merge_split_tokens(tokens): merged_tokens = [] for token in tokens: if token["word"].startswith('##'): merged_tokens[-1]["word"] += token["word"][2:] else: merged_tokens.append(token) return merged_tokens def process_swedish_text(text): # Models from https://huggingface.co/models # https://huggingface.co/KBLab/bert-base-swedish-cased-ner nlp = pipeline('ner', model='KBLab/bert-base-swedish-cased-ner', tokenizer='KBLab/bert-base-swedish-cased-ner') # Run NER nlp_results = nlp(text) print('nlp_results:', nlp_results) nlp_results_merged = merge_split_tokens(nlp_results) # Fix TypeError("'numpy.float32' object is not iterable") nlp_results_adjusted = map(lambda entity: dict(entity, **{ 'score': float(entity['score']) }), nlp_results_merged) print('nlp_results_adjusted:', nlp_results_adjusted) # Return values return {'entities': list(nlp_results_adjusted)} gradio_interface = gradio.Interface( fn=process_swedish_text, inputs="text", outputs="json", examples=[ ["Jag heter Tom och bor i Stockholm."], ["Groens malmgård är en av Stockholms malmgårdar, belägen vid Malmgårdsvägen 53 på Södermalm i Stockholm."] ], title="Swedish Entity Recognition", description="Recognizing Swedish tokens e.g. locations and person names.", article="© Tom Söderlund 2022" ) gradio_interface.launch()