import streamlit as st from annotated_text import annotated_text from refined.inference.processor import Refined import requests import json # Sidebar st.sidebar.image("logo-wordlift.png") # Initiate the model model_options = {"aida_model", "wikipedia_model_with_numbers"} selected_model_name = st.sidebar.selectbox("Select the Model", list(model_options)) # Select entity_set entity_set_options = {"wikidata", "wikipedia"} selected_entity_set = st.sidebar.selectbox("Select the Entity Set", list(entity_set_options)) @st.cache_resource # 👈 Add the caching decorator def load_model(model_name, entity_set): # Load the pretrained model refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set) return refined_model # Use the cached model refined_model = load_model(selected_model_name, selected_entity_set) # Addi citation citation = """ @inproceedings{ayoola-etal-2022-refined, title = "{R}e{F}in{ED}: An Efficient Zero-shot-capable Approach to End-to-End Entity Linking", author = "Tom Ayoola, Shubhi Tyagi, Joseph Fisher, Christos Christodoulopoulos, Andrea Pierleoni", booktitle = "NAACL", year = "2022" } """ with st.sidebar.expander('Citations'): st.markdown(citation) # Helper functions def get_wikidata_id(entity_string): entity_list = entity_string.split("=") entity_id = str(entity_list[1]) entity_link = "http/www.wikidata.org/entity/" + entity_id return {"id": entity_id, "link": entity_link} def get_entity_data(entity_link): try: response = requests.get(f'https://api.wordlift.io/id/{entity_link}') return response.json() except Exception as e: print(f"Exception when fetching data for entity: {entity_link}. Exception: {e}") return None # Create the form with st.form(key='my_form'): text_input = st.text_area(label='Enter a sentence') submit_button = st.form_submit_button(label='Analyze') # Process the text and extract the entities if text_input: entities = refined_model.process_text(text_input) entities_map = {} entities_data = {} for entity in entities: single_entity_list = str(entity).strip('][').replace("\'", "").split(', ') if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]: entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1]) entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"]) if entity_data is not None: entities_data[single_entity_list[0].strip()] = entity_data combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map]) if submit_button: # Prepare a list to hold the final output final_text = [] # JSON-LD data json_ld_data = { "@context": "https://schema.org", "@type": "WebPage", "mentions": [] } # Replace each entity in the text with its annotated version for entity_string, entity_info in entities_map.items(): entity_data = entities_data.get(entity_string, None) entity_type = None if entity_data is not None: entity_type = entity_data.get("@type", None) # Use different colors based on the entity's type color = "#8ef" # Default color if entity_type == "Place": color = "#8AC7DB" elif entity_type == "Organization": color = "#ADD8E6" elif entity_type == "Person": color = "#67B7D1" elif entity_type == "Product": color = "#2ea3f2" elif entity_type == "CreativeWork": color = "#00BFFF" elif entity_type == "Event": color = "#1E90FF" entity_annotation = (entity_string, entity_info["id"], color) text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1) # Add the entity to JSON-LD data entity_json_ld = combined_entity_info_dictionary[entity_string][1] json_ld_data["mentions"].append(entity_json_ld) # Split the modified text_input into a list text_list = text_input.split("{") for item in text_list: if "}" in item: item_list = item.split("}") final_text.append(eval(item_list[0])) if len(item_list[1]) > 0: final_text.append(item_list[1]) else: final_text.append(item) # Pass the final_text to the annotated_text function annotated_text(*final_text) with st.expander("See annotations"): st.write(combined_entity_info_dictionary) with st.expander("Here is the final JSON-LD"): st.json(json_ld_data) # Output JSON-LD