davidmezzetti commited on
Commit
0a6ea65
1 Parent(s): daa1a9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -16,8 +16,9 @@ import streamlit as st
16
 
17
  from txtai.embeddings import Documents, Embeddings
18
  from txtai.pipeline import Segmentation, Summary, Tabular, Translation
19
- from txtai.workflow import ServiceTask, Task, Workflow
20
 
 
21
 
22
  class Application:
23
  """
@@ -245,6 +246,7 @@ class Application:
245
  dict with component settings
246
  """
247
 
 
248
  options = {"type": component}
249
 
250
  st.markdown("---")
@@ -268,8 +270,12 @@ class Application:
268
  options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
269
  options["upsert"] = self.boolean("Upsert", config, "upsert")
270
 
271
- elif component == "segmentation":
272
- st.markdown("**Segment** \n*Split text into semantic units*")
 
 
 
 
273
  options["sentences"] = self.boolean("Split sentences", config, "sentences")
274
  options["lines"] = self.boolean("Split lines", config, "lines")
275
  options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
@@ -346,6 +352,10 @@ class Application:
346
  self.pipelines[wtype] = Tabular(**self.components["tabular"])
347
  tasks.append(Task(self.pipelines[wtype]))
348
 
 
 
 
 
349
  elif wtype == "translation":
350
  self.pipelines[wtype] = Translation()
351
  tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
@@ -398,6 +408,10 @@ class Application:
398
  data[wtype] = component
399
  tasks.append({"action": wtype})
400
 
 
 
 
 
401
  elif wtype == "translation":
402
  data[wtype] = {}
403
  tasks.append({"action": wtype, "args": list(component.values())})
@@ -519,8 +533,8 @@ class Application:
519
  st.markdown("---")
520
 
521
  # Component configuration
522
- labels = {"segmentation": "segment", "translation": "translate"}
523
- components = ["embeddings", "segmentation", "service", "summary", "tabular", "translation"]
524
 
525
  selected, workflow = self.load(components)
526
  selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))
 
16
 
17
  from txtai.embeddings import Documents, Embeddings
18
  from txtai.pipeline import Segmentation, Summary, Tabular, Translation
19
+ from txtai.workflow import ServiceTask, Task, UrlTask, Workflow
20
 
21
+ from textractor import Textractor
22
 
23
  class Application:
24
  """
 
246
  dict with component settings
247
  """
248
 
249
+ # pylint: disable=R0912, R0915
250
  options = {"type": component}
251
 
252
  st.markdown("---")
 
270
  options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
271
  options["upsert"] = self.boolean("Upsert", config, "upsert")
272
 
273
+ elif component in ("segmentation", "textractor"):
274
+ if component == "segmentation":
275
+ st.markdown("**Segment** \n*Split text into semantic units*")
276
+ else:
277
+ st.markdown("**Textract** \n*Extract text from documents*")
278
+
279
  options["sentences"] = self.boolean("Split sentences", config, "sentences")
280
  options["lines"] = self.boolean("Split lines", config, "lines")
281
  options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
 
352
  self.pipelines[wtype] = Tabular(**self.components["tabular"])
353
  tasks.append(Task(self.pipelines[wtype]))
354
 
355
+ elif wtype == "textractor":
356
+ self.pipelines[wtype] = Textractor(**self.components["textract"])
357
+ tasks.append(UrlTask(self.pipelines[wtype]))
358
+
359
  elif wtype == "translation":
360
  self.pipelines[wtype] = Translation()
361
  tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
 
408
  data[wtype] = component
409
  tasks.append({"action": wtype})
410
 
411
+ elif wtype == "textractor":
412
+ data[wtype] = component
413
+ tasks.append({"action": wtype, "task": "url"})
414
+
415
  elif wtype == "translation":
416
  data[wtype] = {}
417
  tasks.append({"action": wtype, "args": list(component.values())})
 
533
  st.markdown("---")
534
 
535
  # Component configuration
536
+ labels = {"segmentation": "segment", "textractor": "textract", "translation": "translate"}
537
+ components = ["embeddings", "segmentation", "service", "summary", "tabular", "textractor", "translation"]
538
 
539
  selected, workflow = self.load(components)
540
  selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))