cyberandy commited on
Commit
d24252f
β€’
1 Parent(s): ae4b928

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -19,11 +19,27 @@ st.set_page_config(
19
 
20
  # Sidebar
21
  st.sidebar.image("logo-wordlift.png")
22
- language_options = {"English", "German"}
23
  selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
24
 
25
  # Based on selected language, configure model, entity set, and citation options
26
- if selected_language != "German":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  model_options = ["aida_model", "wikipedia_model_with_numbers"]
28
  entity_set_options = ["wikidata", "wikipedia"]
29
 
@@ -41,22 +57,6 @@ if selected_language != "German":
41
 
42
  with st.sidebar.expander('Citations'):
43
  st.markdown(refined_citation)
44
- else:
45
- selected_model_name = None
46
- selected_entity_set = None
47
-
48
- entity_fishing_citation = """
49
- @misc{entity-fishing,
50
- title = {entity-fishing},
51
- publisher = {GitHub},
52
- year = {2016--2023},
53
- archivePrefix = {swh},
54
- eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
55
- }
56
- """
57
-
58
- with st.sidebar.expander('Citations'):
59
- st.markdown(entity_fishing_citation)
60
 
61
  @st.cache_resource # πŸ‘ˆ Add the caching decorator
62
  def load_model(selected_language, model_name=None, entity_set=None):
@@ -66,6 +66,12 @@ def load_model(selected_language, model_name=None, entity_set=None):
66
  nlp_model_de.add_pipe("entityfishing")
67
 
68
  return nlp_model_de
 
 
 
 
 
 
69
  else:
70
  # Load the pretrained model for other languages
71
  refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
@@ -101,20 +107,19 @@ entities_map = {}
101
  entities_data = {}
102
 
103
  if text_input:
104
- if selected_language == "German":
105
  doc_de = model(text_input)
106
  entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
107
  for entity in entities:
108
  entity_string, entity_type, wikidata_id, wikidata_url = entity
109
  if wikidata_url:
110
- # Ensure correct format for the German model
111
  formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
112
  entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
113
  entity_data = get_entity_data(formatted_wikidata_url)
114
 
115
  if entity_data is not None:
116
  entities_data[entity_string] = entity_data
117
-
118
  else:
119
  entities = model.process_text(text_input)
120
 
 
19
 
20
  # Sidebar
21
  st.sidebar.image("logo-wordlift.png")
22
+ language_options = {"English", "English - spaCy", "German"}
23
  selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
24
 
25
  # Based on selected language, configure model, entity set, and citation options
26
+ if selected_language == "German" or selected_language == "English - spaCy":
27
+ selected_model_name = None
28
+ selected_entity_set = None
29
+
30
+ entity_fishing_citation = """
31
+ @misc{entity-fishing,
32
+ title = {entity-fishing},
33
+ publisher = {GitHub},
34
+ year = {2016--2023},
35
+ archivePrefix = {swh},
36
+ eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
37
+ }
38
+ """
39
+
40
+ with st.sidebar.expander('Citations'):
41
+ st.markdown(entity_fishing_citation)
42
+ else:
43
  model_options = ["aida_model", "wikipedia_model_with_numbers"]
44
  entity_set_options = ["wikidata", "wikipedia"]
45
 
 
57
 
58
  with st.sidebar.expander('Citations'):
59
  st.markdown(refined_citation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  @st.cache_resource # πŸ‘ˆ Add the caching decorator
62
  def load_model(selected_language, model_name=None, entity_set=None):
 
66
  nlp_model_de.add_pipe("entityfishing")
67
 
68
  return nlp_model_de
69
+ elif selected_language == "English":
70
+ # Load English-specific model
71
+ nlp_model_en = spacy.load("en_core_web_sm")
72
+ nlp_model_en.add_pipe("entityfishing")
73
+
74
+ return nlp_model_en
75
  else:
76
  # Load the pretrained model for other languages
77
  refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
 
107
  entities_data = {}
108
 
109
  if text_input:
110
+ if selected_language in ["German", "English - spaCy"]::
111
  doc_de = model(text_input)
112
  entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
113
  for entity in entities:
114
  entity_string, entity_type, wikidata_id, wikidata_url = entity
115
  if wikidata_url:
116
+ # Ensure correct format for the German and English model
117
  formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
118
  entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
119
  entity_data = get_entity_data(formatted_wikidata_url)
120
 
121
  if entity_data is not None:
122
  entities_data[entity_string] = entity_data
 
123
  else:
124
  entities = model.process_text(text_input)
125