Spaces:

WordLift
/

entity-linking

Running

App Files Files

cyberandy commited on Oct 17, 2023

Commit

117cafd

1 Parent(s): 84d96ae

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -54

app.py CHANGED Viewed

@@ -3,6 +3,11 @@ from annotated_text import annotated_text
 from refined.inference.processor import Refined
 import requests
 import json
 # Page config
 st.set_page_config(
@@ -18,6 +23,8 @@ st.set_page_config(
 # Sidebar
 st.sidebar.image("logo-wordlift.png")
 # Initiate the model
 model_options = {"aida_model", "wikipedia_model_with_numbers"}
@@ -68,61 +75,71 @@ with st.form(key='my_form'):
     text_input = st.text_area(label='Enter a sentence')
     submit_button = st.form_submit_button(label='Analyze')
-# Process the text and extract the entities
 if text_input:
-    entities = refined_model.process_text(text_input)
-    entities_map = {}
-    entities_data = {}
-    for entity in entities:
-        single_entity_list = str(entity).strip('][').replace("\'", "").split(', ')
-        if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]:
-            entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1])
-            entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"])
-            if entity_data is not None:
-                entities_data[single_entity_list[0].strip()] = entity_data
-    combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map])
-    if submit_button:
-        # Prepare a list to hold the final output
-        final_text = []
-        # JSON-LD data
-        json_ld_data = {
-            "@context": "https://schema.org",
-            "@type": "WebPage",
-            "mentions": []
-        }
-        # Replace each entity in the text with its annotated version
-        for entity_string, entity_info in entities_map.items():
-            entity_data = entities_data.get(entity_string, None)
-            entity_type = None
-            if entity_data is not None:
-                entity_type = entity_data.get("@type", None)
-            # Use different colors based on the entity's type
-            color = "#8ef"  # Default color
-            if entity_type == "Place":
-                color = "#8AC7DB"
-            elif entity_type == "Organization":
-                color = "#ADD8E6"
-            elif entity_type == "Person":
-                color = "#67B7D1"
-            elif entity_type == "Product":
-                color = "#2ea3f2"
-            elif entity_type == "CreativeWork":
-                color = "#00BFFF"
-            elif entity_type == "Event":
-                color = "#1E90FF"
-            entity_annotation = (entity_string, entity_info["id"], color)
-            text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1)
-            # Add the entity to JSON-LD data
-            entity_json_ld = combined_entity_info_dictionary[entity_string][1]
-            json_ld_data["mentions"].append(entity_json_ld)
         # Split the modified text_input into a list
         text_list = text_input.split("{")

 from refined.inference.processor import Refined
 import requests
 import json
+import spacy
+# Load German model
+nlp_model_de = spacy.load("de_core_news_sm")
+nlp_model_de.add_pipe("entityfishing", config={"language": "de"})
 # Page config
 st.set_page_config(
 # Sidebar
 st.sidebar.image("logo-wordlift.png")
+language_options = {"English", "German"}
+selected_language = st.sidebar.selectbox("Select the Language", list(language_options))
 # Initiate the model
 model_options = {"aida_model", "wikipedia_model_with_numbers"}
     text_input = st.text_area(label='Enter a sentence')
     submit_button = st.form_submit_button(label='Analyze')
 if text_input:
+    if selected_language == "German":
+        doc_de = nlp_model_de(text_input)
+        entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
+        # You will have to adjust the rest of the code since the format is different
+        # For the demo, we'll simply print them for now
+        for entity in entities:
+            st.write(entity)
+    else:
+        entities = refined_model.process_text(text_input)
+        entities = refined_model.process_text(text_input)
+        entities_map = {}
+        entities_data = {}
+        for entity in entities:
+            single_entity_list = str(entity).strip('][').replace("\'", "").split(', ')
+            if len(single_entity_list) >= 2 and "wikidata" in single_entity_list[1]:
+                entities_map[single_entity_list[0].strip()] = get_wikidata_id(single_entity_list[1])
+                entity_data = get_entity_data(entities_map[single_entity_list[0].strip()]["link"])
+                if entity_data is not None:
+                    entities_data[single_entity_list[0].strip()] = entity_data
+        combined_entity_info_dictionary = dict([(k, [entities_map[k], entities_data[k] if k in entities_data else None]) for k in entities_map])
+        if submit_button:
+            # Prepare a list to hold the final output
+            final_text = []
+            # JSON-LD data
+            json_ld_data = {
+                "@context": "https://schema.org",
+                "@type": "WebPage",
+                "mentions": []
+            }
+            # Replace each entity in the text with its annotated version
+            for entity_string, entity_info in entities_map.items():
+                entity_data = entities_data.get(entity_string, None)
+                entity_type = None
+                if entity_data is not None:
+                    entity_type = entity_data.get("@type", None)
+                # Use different colors based on the entity's type
+                color = "#8ef"  # Default color
+                if entity_type == "Place":
+                    color = "#8AC7DB"
+                elif entity_type == "Organization":
+                    color = "#ADD8E6"
+                elif entity_type == "Person":
+                    color = "#67B7D1"
+                elif entity_type == "Product":
+                    color = "#2ea3f2"
+                elif entity_type == "CreativeWork":
+                    color = "#00BFFF"
+                elif entity_type == "Event":
+                    color = "#1E90FF"
+                entity_annotation = (entity_string, entity_info["id"], color)
+                text_input = text_input.replace(entity_string, f'{{{str(entity_annotation)}}}', 1)
+                # Add the entity to JSON-LD data
+                entity_json_ld = combined_entity_info_dictionary[entity_string][1]
+                json_ld_data["mentions"].append(entity_json_ld)
         # Split the modified text_input into a list
         text_list = text_input.split("{")