Thomas De Decker commited on
Commit
aab5966
β€’
1 Parent(s): 8d04b0f

Update description + Fix highlight bugs

Browse files
Files changed (1) hide show
  1. app.py +49 -38
app.py CHANGED
@@ -17,9 +17,13 @@ def load_pipeline(chosen_model):
17
  return KeyphraseGenerationPipeline(chosen_model, truncation=True)
18
 
19
 
 
 
 
 
20
  def extract_keyphrases():
21
  st.session_state.keyphrases = pipe(st.session_state.input_text)
22
- st.session_state.history[f"run_{st.session_state.current_run_id}"] = {
23
  "run_id": st.session_state.current_run_id,
24
  "model": st.session_state.chosen_model,
25
  "text": st.session_state.input_text,
@@ -31,7 +35,7 @@ def extract_keyphrases():
31
  def get_annotated_text(text, keyphrases, color="#d294ff"):
32
  for keyphrase in keyphrases:
33
  text = re.sub(
34
- rf"({keyphrase})([^A-Za-z])",
35
  rf"$K:{keyphrases.index(keyphrase)}\2",
36
  text,
37
  flags=re.I,
@@ -83,17 +87,6 @@ def render_output(layout, runs, reverse=False):
83
  unsafe_allow_html=True,
84
  )
85
 
86
- if "generation" in run.get("model"):
87
- abstractive_keyphrases = [
88
- keyphrase
89
- for keyphrase in run.get("keyphrases")
90
- if keyphrase.lower() not in run.get("text").lower()
91
- ]
92
- layout.markdown(
93
- f"<p style=\"margin-bottom: 0rem\"><strong>Absent keyphrases:</strong> {', '.join(abstractive_keyphrases) if abstractive_keyphrases else 'None' }</p>",
94
- unsafe_allow_html=True,
95
- )
96
-
97
  result = get_annotated_text(run.get("text"), list(run.get("keyphrases")))
98
  layout.markdown(
99
  f"""
@@ -102,6 +95,20 @@ def render_output(layout, runs, reverse=False):
102
  """,
103
  unsafe_allow_html=True,
104
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  layout.markdown("---")
106
 
107
 
@@ -125,32 +132,36 @@ with open("css/style.css") as f:
125
  st.header("πŸ”‘ Keyphrase extraction/generation with Transformers")
126
 
127
  description = """
128
- Keyphrase extraction is a technique in text analysis where you extract the important keyphrases
129
- from a text. Since this is a time-consuming process, Artificial Intelligence is used to automate it.
130
- Currently, classical machine learning methods, that use statistics and linguistics, are widely used
131
- for the extraction process. The fact that these methods have been widely used in the community has
132
- the advantage that there are many easy-to-use libraries. Now with the recent innovations in
133
- NLP, transformers can be used to improve keyphrase extraction. Transformers also focus on the semantics and
134
- context of a document, which is quite an improvement.
135
-
136
- This space gives you the ability to test around with some keyphrase extraction and generation models.
137
- Keyphrase extraction models are transformers models fine-tuned as a token classification problem where
138
- the tokens in a text are annotated as B (Beginning of a keyphrase), I (Inside a keyphrases),
 
 
 
 
 
139
  and O (Outside a keyhprase).
140
 
141
- While keyphrase extraction can only extract keyphrases from a given text. Keyphrase generation models
142
- work a bit differently. Here you use an encoder-decoder model like BART to generate keyphrases from a given text.
143
- These models also have the ability to generate keyphrases, which are not present in the text 🀯.
 
144
 
145
- Do you want to see some magic πŸ§™β€β™‚οΈ? Try it out yourself! πŸ‘‡
146
  """
147
 
148
  st.write(description)
149
 
150
  with st.form("keyphrase-extraction-form"):
151
- selectbox_container, _ = st.columns(2)
152
-
153
- st.session_state.chosen_model = selectbox_container.selectbox(
154
  "Choose your model:", st.session_state.config.get("models")
155
  )
156
 
@@ -170,7 +181,8 @@ with st.form("keyphrase-extraction-form"):
170
  )
171
 
172
  with st.spinner("Extracting keyphrases..."):
173
- pressed = st.form_submit_button("Extract")
 
174
 
175
  if pressed and st.session_state.input_text != "":
176
  with st.spinner("Loading pipeline..."):
@@ -182,13 +194,12 @@ if pressed and st.session_state.input_text != "":
182
  elif st.session_state.input_text == "":
183
  st.error("The text input is empty πŸ™ƒ Please provide a text in the input field.")
184
 
185
- options = st.multiselect(
186
- "Specify the runs you want to see",
187
- st.session_state.history.keys(),
188
- format_func=lambda run_id: f"Run {run_id.split('_')[1]}",
189
- )
190
-
191
  if len(st.session_state.history.keys()) > 0:
 
 
 
 
 
192
  if options:
193
  render_output(
194
  st,
 
17
  return KeyphraseGenerationPipeline(chosen_model, truncation=True)
18
 
19
 
20
+ def generate_run_id():
21
+ return f"run_{re.sub('keyphrase-extraction-|keyphrase-generation-', '', st.session_state.chosen_model)}_{st.session_state.current_run_id}"
22
+
23
+
24
  def extract_keyphrases():
25
  st.session_state.keyphrases = pipe(st.session_state.input_text)
26
+ st.session_state.history[generate_run_id()] = {
27
  "run_id": st.session_state.current_run_id,
28
  "model": st.session_state.chosen_model,
29
  "text": st.session_state.input_text,
 
35
  def get_annotated_text(text, keyphrases, color="#d294ff"):
36
  for keyphrase in keyphrases:
37
  text = re.sub(
38
+ rf"({keyphrase})([^A-Za-z0-9])",
39
  rf"$K:{keyphrases.index(keyphrase)}\2",
40
  text,
41
  flags=re.I,
 
87
  unsafe_allow_html=True,
88
  )
89
 
 
 
 
 
 
 
 
 
 
 
 
90
  result = get_annotated_text(run.get("text"), list(run.get("keyphrases")))
91
  layout.markdown(
92
  f"""
 
95
  """,
96
  unsafe_allow_html=True,
97
  )
98
+ if "generation" in run.get("model"):
99
+ abstractive_keyphrases = [
100
+ (keyphrase, "KEY", "#FFA500")
101
+ for keyphrase in run.get("keyphrases")
102
+ if keyphrase.lower() not in run.get("text").lower()
103
+ ]
104
+ for i in range(len(abstractive_keyphrases)):
105
+ if i % 2 == 0:
106
+ abstractive_keyphrases.insert(i + 1, " ")
107
+
108
+ layout.markdown(
109
+ f"<p style=\"margin: 1rem 0 0 0\"><strong>Absent keyphrases:</strong> {get_annotated_html(*abstractive_keyphrases) if abstractive_keyphrases else 'None' }</p>",
110
+ unsafe_allow_html=True,
111
+ )
112
  layout.markdown("---")
113
 
114
 
 
132
  st.header("πŸ”‘ Keyphrase extraction/generation with Transformers")
133
 
134
  description = """
135
+ Keyphrase extraction is a technique in text analysis where you extract the important keyphrases from a document.
136
+ Thanks to these keyphrases humans can understand the content of a text very quickly and easily without reading
137
+ it completely. Keyphrase extraction was first done primarily by human annotators, who read the text in detail
138
+ and then wrote down the most important keyphrases. The disadvantage is that if you work with a lot of documents,
139
+ this process can take a lot of time ⏳.
140
+
141
+ Here is where Artificial Intelligence πŸ€– comes in. Currently, classical machine learning methods, that use statistical
142
+ and linguistic features, are widely used for the extraction process. Now with deep learning, it is possible to capture
143
+ the semantic meaning of a text even better than these classical methods. Classical methods look at the frequency,
144
+ occurrence and order of words in the text, whereas these neural approaches can capture long-term semantic dependencies
145
+ and context of words in a text.
146
+
147
+ This space gives you the ability to extract keyphrases out of a custom text with transformer-based extraction and generation models.
148
+ Keyphrase extraction models are transformer models fine-tuned as a token classification problem where each word in the document
149
+ is classified as being part of a keyphrase or not.
150
+ The labels used during fine-tuning are B (Beginning of a keyphrase), I (Inside a keyphrases),
151
  and O (Outside a keyhprase).
152
 
153
+ While keyphrase extraction use encoder-only models to interpret the document. Keyphrase generation models
154
+ work a bit differently. Here you use an encoder-decoder model (e.g. BART, T5) to generate keyphrases from a given text.
155
+ These models also have the ability to generate keyphrases, which are not present in the text 🀯.
156
+ This can be really interesting in certain applications. For example if you want to make a news article more discoverable.
157
 
158
+ Try it out yourself! πŸ‘‡
159
  """
160
 
161
  st.write(description)
162
 
163
  with st.form("keyphrase-extraction-form"):
164
+ st.session_state.chosen_model = st.selectbox(
 
 
165
  "Choose your model:", st.session_state.config.get("models")
166
  )
167
 
 
181
  )
182
 
183
  with st.spinner("Extracting keyphrases..."):
184
+ _, button_container = st.columns([7, 1])
185
+ pressed = button_container.form_submit_button("Extract")
186
 
187
  if pressed and st.session_state.input_text != "":
188
  with st.spinner("Loading pipeline..."):
 
194
  elif st.session_state.input_text == "":
195
  st.error("The text input is empty πŸ™ƒ Please provide a text in the input field.")
196
 
 
 
 
 
 
 
197
  if len(st.session_state.history.keys()) > 0:
198
+ options = st.multiselect(
199
+ "Specify the runs you want to see",
200
+ st.session_state.history.keys(),
201
+ format_func=lambda run_id: f"Run {run_id.split('_')[-1]}: {run_id.split('_')[1]}",
202
+ )
203
  if options:
204
  render_output(
205
  st,