# import streamlit as st # from transformers import pipeline # # Load the model from the Hugging Face Hub # ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner") # # Example predictions # text = st.text_area('enter text: ') # results = ner_pipeline(text) # for entity in results: # print(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}") # st.json(entity) import streamlit as st from transformers import pipeline # Load the model from the Hugging Face Hub ner_pipeline = pipeline("ner", model="Beehzod/smart-finetuned-ner") # Helper function to combine subword tokens def merge_entities(entities): merged_entities = [] current_entity = None for token in entities: token_text = token['word'].replace("##", "") # Remove subword prefix if any # If token starts a new entity if token['entity'].startswith('B-') or (current_entity and token['entity'] != current_entity['entity']): # Add the previous entity if it exists if current_entity: # Average the score for all subwords in the entity current_entity['score'] /= current_entity['count'] del current_entity['count'] merged_entities.append(current_entity) # Start a new entity current_entity = { "word": token_text, "entity": token['entity'], "score": token['score'], "start": token['start'], "end": token['end'], "count": 1 # Helper count for score averaging } else: # Continue with the current entity current_entity["word"] += token_text current_entity["end"] = token['end'] current_entity["score"] += token['score'] current_entity["count"] += 1 # Add the last entity if it exists if current_entity: current_entity['score'] /= current_entity['count'] del current_entity['count'] merged_entities.append(current_entity) return merged_entities # Streamlit app to display entities st.title('Named Entity Recognition (NER) with Streamlit') text = st.text_area('Enter text for NER analysis:') # Run NER model and merge results if text: results = ner_pipeline(text) merged_results = merge_entities(results) # Display results for entity in merged_results: st.write(f"Entity: {entity['word']}, Label: {entity['entity']}, Score: {entity['score']:.4f}") st.json(entity)