Spaces:

GIZ
/

SDSN-demo

Running on CPU Upgrade

prashant commited on Nov 1, 2022

Commit

7de7bf4

1 Parent(s): 1a4b146

haystack adaptation

Files changed (1) hide show

udfPreprocess/sdg_classifier.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from tkinter import Text
 from haystack.nodes import TransformersDocumentClassifier
 from typing import List, Tuple
 import configparser
 import streamlit as st
@@ -27,7 +28,7 @@ def load_sdgClassifier():
     return doc_classifier
-def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
     """
     Text-Classification on the list of texts provided. Classifier provides the
     most appropriate label for each text. these labels are in terms of if text
@@ -35,9 +36,9 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
     Params
     ---------
-    paraList: List of paragrpahs/text. The output of Preprocessing Pipeline
-    contains this list of paragraphs in different format, the simple List format
-    is being used here.
     Returns
     ----------
@@ -51,7 +52,7 @@ def sdg_classification(paraList:List[Text])->Tuple[DataFrame,Series]:
     classifier = load_sdgClassifier()
-    results = classifier.predict(paraList)
     labels_= [(l.meta['classification']['label'],
@@ -89,4 +90,4 @@ def runSDGPreprocessingPipeline()->List[Text]:
                                             "split_by": split_by, \
                                             "split_length":split_length}})
-    return output_sdg_pre['paraList']

 from tkinter import Text
 from haystack.nodes import TransformersDocumentClassifier
+from haystack.schema import Document
 from typing import List, Tuple
 import configparser
 import streamlit as st
     return doc_classifier
+def sdg_classification(haystackdoc:List[Document])->Tuple[DataFrame,Series]:
     """
     Text-Classification on the list of texts provided. Classifier provides the
     most appropriate label for each text. these labels are in terms of if text
     Params
     ---------
+    haystackdoc: List of haystack Documents. The output of Preprocessing Pipeline
+    contains the list of paragraphs in different format,here the list of
+    Haystack Documents is used.
     Returns
     ----------
     classifier = load_sdgClassifier()
+    results = classifier.predict(haystackdoc)
     labels_= [(l.meta['classification']['label'],
                                             "split_by": split_by, \
                                             "split_length":split_length}})
+    return output_sdg_pre['documents']