Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
7de7bf4
1 Parent(s): 1a4b146

haystack adaptation

Browse files
Files changed (1) hide show
  1. udfPreprocess/sdg_classifier.py +7 -6
udfPreprocess/sdg_classifier.py CHANGED
@@ -1,5 +1,6 @@
1
  from tkinter import Text
2
  from haystack.nodes import TransformersDocumentClassifier
 
3
  from typing import List, Tuple
4
  import configparser
5
  import streamlit as st
@@ -27,7 +28,7 @@ def load_sdgClassifier():
27
  return doc_classifier
28
 
29
 
30
- def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
31
  """
32
  Text-Classification on the list of texts provided. Classifier provides the
33
  most appropriate label for each text. these labels are in terms of if text
@@ -35,9 +36,9 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
35
 
36
  Params
37
  ---------
38
- paraList: List of paragrpahs/text. The output of Preprocessing Pipeline
39
- contains this list of paragraphs in different format, the simple List format
40
- is being used here.
41
 
42
  Returns
43
  ----------
@@ -51,7 +52,7 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
51
 
52
 
53
  classifier = load_sdgClassifier()
54
- results = classifier.predict(paraList)
55
 
56
 
57
  labels_= [(l.meta['classification']['label'],
@@ -89,4 +90,4 @@ def runSDGPreprocessingPipeline()->List[Text]:
89
  "split_by": split_by, \
90
  "split_length":split_length}})
91
 
92
- return output_sdg_pre['paraList']
 
1
  from tkinter import Text
2
  from haystack.nodes import TransformersDocumentClassifier
3
+ from haystack.schema import Document
4
  from typing import List, Tuple
5
  import configparser
6
  import streamlit as st
 
28
  return doc_classifier
29
 
30
 
31
+ def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
32
  """
33
  Text-Classification on the list of texts provided. Classifier provides the
34
  most appropriate label for each text. these labels are in terms of if text
 
36
 
37
  Params
38
  ---------
39
+ haystackdoc: List of haystack Documents. The output of Preprocessing Pipeline
40
+ contains the list of paragraphs in different format,here the list of
41
+ Haystack Documents is used.
42
 
43
  Returns
44
  ----------
 
52
 
53
 
54
  classifier = load_sdgClassifier()
55
+ results = classifier.predict(haystackdoc)
56
 
57
 
58
  labels_= [(l.meta['classification']['label'],
 
90
  "split_by": split_by, \
91
  "split_length":split_length}})
92
 
93
+ return output_sdg_pre['documents']